-# -*-pyrex-*-
-#
-# $Id$
-#
-# Line buffering
-#
-# (c) 2005 Straylight/Edgeware
-#
-
-#----- Licensing notice -----------------------------------------------------
-#
-# This file is part of the Python interface to mLib.
-#
-# mLib/Python is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# mLib/Python is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with mLib/Python; if not, write to the Free Software Foundation,
-# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+### -*-pyrex-*-
+###
+### Line buffering
+###
+### (c) 2005 Straylight/Edgeware
+###
+
+###----- Licensing notice ---------------------------------------------------
+###
+### This file is part of the Python interface to mLib.
+###
+### mLib/Python is free software; you can redistribute it and/or modify
+### it under the terms of the GNU General Public License as published by
+### the Free Software Foundation; either version 2 of the License, or
+### (at your option) any later version.
+###
+### mLib/Python is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+### GNU General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with mLib/Python; if not, write to the Free Software Foundation,
+### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LBUF_CRLF = _LBUF_CRLF
LBUF_STRICTCRLF = _LBUF_STRICTCRLF
cdef class LineBuffer:
+ """
+ LineBuffer([lineproc = None], [eofproc = None])
+
+ Split an incoming stream into lines.
+ """
+
cdef lbuf b
- cdef _line
- cdef _eof
- def __cinit__(me, lineproc = None, eofproc = None, *hunoz, **hukairz):
+ cdef object _line
+ cdef object _eof
+
+ def __cinit__(me):
lbuf_init(&me.b, _lbfunc, <void *>me)
- me._line = _checkcallable(lineproc, 'line proc')
- me._eof = _checkcallable(eofproc, 'eof proc')
+ me._line = None
+ me._eof = None
def __dealloc__(me):
lbuf_destroy(&me.b)
- property activep:
- def __get__(me):
- return _tobool(me.b.f & LBUF_ENABLE)
- property delim:
- def __get__(me):
- if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF:
- return me.b.delim
- else:
- return chr(me.b.delim)
- def __set__(me, d):
- if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF:
- me.b.delim = d
- else:
- me.b.delim = ord(d)
- property size:
- def __get__(me):
- return me.b.sz
- def __set__(me, sz):
- if sz <= 0:
- raise TypeError, 'size must be positive'
- lbuf_setsize(&me.b, sz)
- property lineproc:
- def __get__(me):
- return me._line
- def __set__(me, proc):
- me._line = _checkcallable(proc, 'line proc')
- def __del__(me):
- me._line = None
- property eofproc:
- def __get__(me):
- return me._eof
- def __set__(me, proc):
- me._eof = _checkcallable(proc, 'eof proc')
- def __del__(me):
- me._eof = None
+
+ def __init__(me, object lineproc = None, object eofproc = None):
+ me._line = _checkcallable(lineproc, 'line proc')
+ me._eof = _checkcallable(eofproc, 'eof proc')
+
+ @property
+ def activep(me):
+ """LB.activep -> BOOL: is the buffer still active?"""
+ return <bint>(me.b.f & LBUF_ENABLE)
+
+ @property
+ def delim(me):
+ """LB.delim -> CHAR | LBUF_...: line-end delimiter"""
+ if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF:
+ return me.b.delim
+ else:
+ return chr(me.b.delim)
+ @delim.setter
+ def delim(me, d):
+ cdef ch
+ if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF:
+ me.b.delim = d
+ else:
+ me.b.delim = ord(d)
+
+ @property
+ def size(me):
+ """LB.size -> INT: buffer size limit"""
+ return me.b.sz
+ @size.setter
+ def size(me, size_t sz):
+ lbuf_setsize(&me.b, sz)
+
+ @property
+ def lineproc(me):
+ """LB.lineproc -> FUNC: call FUNC(LINE) on each line"""
+ return me._line
+ @lineproc.setter
+ def lineproc(me, object proc):
+ me._line = _checkcallable(proc, 'line proc')
+ @lineproc.deleter
+ def lineproc(me):
+ me._line = None
+
+ @property
+ def eofproc(me):
+ """LB.eofproc -> FUNC: call FUNC() at end-of-file"""
+ return me._eof
+ @eofproc.setter
+ def eofproc(me, object proc):
+ me._eof = _checkcallable(proc, 'eof proc')
+ @eofproc.deleter
+ def eofproc(me):
+ me._eof = None
+
def enable(me):
+ """LB.enable(): enable the buffer, allowing lines to be emitted"""
if me.b.f & LBUF_ENABLE:
- raise ValueError, 'already enabled'
+ raise ValueError('already enabled')
me.b.f = me.b.f | LBUF_ENABLE
me.enabled()
return me
+
def disable(me):
+ """LB.disable(): disable the buffer, suspending line emission"""
if not (me.b.f & LBUF_ENABLE):
- raise ValueError, 'already disabled'
+ raise ValueError('already disabled')
me.b.f = me.b.f & ~LBUF_ENABLE
me.disabled()
return me
+
def close(me):
+ """LB.close(): report the end of the input stream"""
if not (me.b.f & LBUF_ENABLE):
- raise ValueError, 'buffer disabled'
+ raise ValueError('buffer disabled')
lbuf_close(&me.b)
return me
- property free:
- def __get__(me):
- cdef char *p
- return lbuf_free(&me.b, &p)
+
+ @property
+ def free(me):
+ """LB.free -> INT: amount of space remaining in buffer"""
+ cdef char *p
+ return lbuf_free(&me.b, &p)
+
def flush(me, str):
- cdef int len
+ """LB.flush(STR) -> insert STR into the buffer and emit lines"""
+ cdef Py_ssize_t len
cdef char *p
cdef char *q
cdef size_t n
- PyString_AsStringAndSize(str, &p, &len)
+
+ ## Get the input string as bytes.
+ TEXT_PTRLEN(str, &p, &len)
+
+ ## Feed the input string into the buffer.
while len > 0:
n = lbuf_free(&me.b, &q)
if n > len:
n = len
- memcpy(q, p, n)
- p = p + n
- len = len - n
+ memcpy(q, p, n); p += n; len -= n
if not (me.b.f & LBUF_ENABLE):
break
lbuf_flush(&me.b, q, n)
- return PyString_FromStringAndSize(p, len)
+
+ IF PYVERSION >= (3,):
+ ## And here we have a problem. The line buffer may have been disabled
+ ## while we still have text to push through, and the split may be
+ ## within a UTF-8-encoded scalar. Let's see if there's anything to do
+ ## before we start worrying too much.
+
+ if len == 0:
+ ## We pushed all of our data into the buffer, so there's nothing left
+ ## over.
+
+ pass
+
+ elif me.b.len == me.b.sz:
+ ## We filled the buffer up, and there was no newline. We already
+ ## sent the truncated line to the output function, but we still have
+ ## the remaining piece. Trim any remaining pieces of the UTF-8
+ ## scalar from the start of the leftover string.
+
+ while len > 0 and 128 <= <unsigned char>p[0] < 192:
+ p += 1; len -= 1
+
+ else:
+ ## The remaining possibility is the tricky one. After accepting a
+ ## full line, the line function has disabled further input. We've
+ ## just filled the buffer up and we have stuff left over. If the
+ ## leftover portion starts midway through a UTF-8-encoded scalar then
+ ## Python won't let us stuff it back into a string. So work
+ ## backwards through the buffer until we reach the start of a scalar.
+ ##
+ ## This must work, because the only way the tail end of a scalar
+ ## could be left over is if the start of that scalar came from our
+ ## original input string.
+
+ while 128 <= <unsigned char>p[0] < 192:
+ p -= 1; len += 1; me.b.len -= 1
+
+ ## Everything is OK now.
+ return TEXT_FROMSTRLEN(p, len)
+
def enabled(me):
+ """LB.enabled(): called when buffer is enabled"""
pass
def disabled(me):
+ """LB.disabled(): called when buffer is disabled"""
pass
def line(me, line):
+ """LB.line(LINE): called for each completed line"""
return _maybecall(me._line, (line,))
def eof(me):
+ """LB.eof(): called at end-of-file"""
return _maybecall(me._eof, ())
cdef void _lbfunc(char *s, size_t n, void *arg):
- cdef LineBuffer sb
- sb = <LineBuffer>arg
+ cdef LineBuffer sb = <LineBuffer>arg
if s is NULL:
sb.eof()
else:
- sb.line(PyString_FromStringAndSize(s, n))
+ IF PYVERSION >= (3,):
+ ## If the input line was too long and has been truncated then there
+ ## might be an incomplete Unicode scalar at the end. Strip this away.
+
+ while n > 0 and 128 <= <unsigned char>s[n - 1] < 192:
+ n -= 1
+
+ sb.line(TEXT_FROMSTRLEN(s, n))
-#----- That's all, folks ----------------------------------------------------
+###----- That's all, folks --------------------------------------------------