X-Git-Url: https://git.distorted.org.uk/~mdw/mLib-python/blobdiff_plain/376ad06df03e59ebf0796b2f475417150e82252d..803869bc2d77e95aade23e04bf110dfef4092b2e:/lbuf.pyx diff --git a/lbuf.pyx b/lbuf.pyx index a66e3a3..ba3bee3 100644 --- a/lbuf.pyx +++ b/lbuf.pyx @@ -1,131 +1,216 @@ -# -*-pyrex-*- -# -# $Id$ -# -# Line buffering -# -# (c) 2005 Straylight/Edgeware -# - -#----- Licensing notice ----------------------------------------------------- -# -# This file is part of the Python interface to mLib. -# -# mLib/Python is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# mLib/Python is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with mLib/Python; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +### -*-pyrex-*- +### +### Line buffering +### +### (c) 2005 Straylight/Edgeware +### + +###----- Licensing notice --------------------------------------------------- +### +### This file is part of the Python interface to mLib. +### +### mLib/Python is free software; you can redistribute it and/or modify +### it under the terms of the GNU General Public License as published by +### the Free Software Foundation; either version 2 of the License, or +### (at your option) any later version. +### +### mLib/Python is distributed in the hope that it will be useful, +### but WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +### GNU General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with mLib/Python; if not, write to the Free Software Foundation, +### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. LBUF_CRLF = _LBUF_CRLF LBUF_STRICTCRLF = _LBUF_STRICTCRLF cdef class LineBuffer: + """ + LineBuffer([lineproc = None], [eofproc = None]) + + Split an incoming stream into lines. + """ + cdef lbuf b - cdef _line - cdef _eof - def __cinit__(me, lineproc = None, eofproc = None, *hunoz, **hukairz): + cdef object _line + cdef object _eof + + def __cinit__(me): lbuf_init(&me.b, _lbfunc, me) - me._line = _checkcallable(lineproc, 'line proc') - me._eof = _checkcallable(eofproc, 'eof proc') + me._line = None + me._eof = None def __dealloc__(me): lbuf_destroy(&me.b) - property activep: - def __get__(me): - return _tobool(me.b.f & LBUF_ENABLE) - property delim: - def __get__(me): - if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF: - return me.b.delim - else: - return chr(me.b.delim) - def __set__(me, d): - if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF: - me.b.delim = d - else: - me.b.delim = ord(d) - property size: - def __get__(me): - return me.b.sz - def __set__(me, sz): - if sz <= 0: - raise TypeError, 'size must be positive' - lbuf_setsize(&me.b, sz) - property lineproc: - def __get__(me): - return me._line - def __set__(me, proc): - me._line = _checkcallable(proc, 'line proc') - def __del__(me): - me._line = None - property eofproc: - def __get__(me): - return me._eof - def __set__(me, proc): - me._eof = _checkcallable(proc, 'eof proc') - def __del__(me): - me._eof = None + + def __init__(me, object lineproc = None, object eofproc = None): + me._line = _checkcallable(lineproc, 'line proc') + me._eof = _checkcallable(eofproc, 'eof proc') + + @property + def activep(me): + """LB.activep -> BOOL: is the buffer still active?""" + return (me.b.f & LBUF_ENABLE) + + @property + def delim(me): + """LB.delim -> CHAR | LBUF_...: line-end delimiter""" + if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF: + return me.b.delim + else: + return chr(me.b.delim) + @delim.setter + def delim(me, d): + cdef ch + if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF: + me.b.delim = d + else: + me.b.delim = ord(d) + + @property + def size(me): + """LB.size -> INT: buffer size limit""" + return me.b.sz + @size.setter + def size(me, size_t sz): + lbuf_setsize(&me.b, sz) + + @property + def lineproc(me): + """LB.lineproc -> FUNC: call FUNC(LINE) on each line""" + return me._line + @lineproc.setter + def lineproc(me, object proc): + me._line = _checkcallable(proc, 'line proc') + @lineproc.deleter + def lineproc(me): + me._line = None + + @property + def eofproc(me): + """LB.eofproc -> FUNC: call FUNC() at end-of-file""" + return me._eof + @eofproc.setter + def eofproc(me, object proc): + me._eof = _checkcallable(proc, 'eof proc') + @eofproc.deleter + def eofproc(me): + me._eof = None + def enable(me): + """LB.enable(): enable the buffer, allowing lines to be emitted""" if me.b.f & LBUF_ENABLE: - raise ValueError, 'already enabled' + raise ValueError('already enabled') me.b.f = me.b.f | LBUF_ENABLE me.enabled() return me + def disable(me): + """LB.disable(): disable the buffer, suspending line emission""" if not (me.b.f & LBUF_ENABLE): - raise ValueError, 'already disabled' + raise ValueError('already disabled') me.b.f = me.b.f & ~LBUF_ENABLE me.disabled() return me + def close(me): + """LB.close(): report the end of the input stream""" if not (me.b.f & LBUF_ENABLE): - raise ValueError, 'buffer disabled' + raise ValueError('buffer disabled') lbuf_close(&me.b) return me - property free: - def __get__(me): - cdef char *p - return lbuf_free(&me.b, &p) + + @property + def free(me): + """LB.free -> INT: amount of space remaining in buffer""" + cdef char *p + return lbuf_free(&me.b, &p) + def flush(me, str): - cdef int len + """LB.flush(STR) -> insert STR into the buffer and emit lines""" + cdef Py_ssize_t len cdef char *p cdef char *q cdef size_t n - PyString_AsStringAndSize(str, &p, &len) + + ## Get the input string as bytes. + TEXT_PTRLEN(str, &p, &len) + + ## Feed the input string into the buffer. while len > 0: n = lbuf_free(&me.b, &q) if n > len: n = len - memcpy(q, p, n) - p = p + n - len = len - n + memcpy(q, p, n); p += n; len -= n if not (me.b.f & LBUF_ENABLE): break lbuf_flush(&me.b, q, n) - return PyString_FromStringAndSize(p, len) + + IF PYVERSION >= (3,): + ## And here we have a problem. The line buffer may have been disabled + ## while we still have text to push through, and the split may be + ## within a UTF-8-encoded scalar. Let's see if there's anything to do + ## before we start worrying too much. + + if len == 0: + ## We pushed all of our data into the buffer, so there's nothing left + ## over. + + pass + + elif me.b.len == me.b.sz: + ## We filled the buffer up, and there was no newline. We already + ## sent the truncated line to the output function, but we still have + ## the remaining piece. Trim any remaining pieces of the UTF-8 + ## scalar from the start of the leftover string. + + while len > 0 and 128 <= p[0] < 192: + p += 1; len -= 1 + + else: + ## The remaining possibility is the tricky one. After accepting a + ## full line, the line function has disabled further input. We've + ## just filled the buffer up and we have stuff left over. If the + ## leftover portion starts midway through a UTF-8-encoded scalar then + ## Python won't let us stuff it back into a string. So work + ## backwards through the buffer until we reach the start of a scalar. + ## + ## This must work, because the only way the tail end of a scalar + ## could be left over is if the start of that scalar came from our + ## original input string. + + while 128 <= p[0] < 192: + p -= 1; len += 1; me.b.len -= 1 + + ## Everything is OK now. + return TEXT_FROMSTRLEN(p, len) + def enabled(me): + """LB.enabled(): called when buffer is enabled""" pass def disabled(me): + """LB.disabled(): called when buffer is disabled""" pass def line(me, line): + """LB.line(LINE): called for each completed line""" return _maybecall(me._line, (line,)) def eof(me): + """LB.eof(): called at end-of-file""" return _maybecall(me._eof, ()) cdef void _lbfunc(char *s, size_t n, void *arg): - cdef LineBuffer sb - sb = arg + cdef LineBuffer sb = arg if s is NULL: sb.eof() else: - sb.line(PyString_FromStringAndSize(s, n)) + IF PYVERSION >= (3,): + ## If the input line was too long and has been truncated then there + ## might be an incomplete Unicode scalar at the end. Strip this away. + + while n > 0 and 128 <= s[n - 1] < 192: + n -= 1 + + sb.line(TEXT_FROMSTRLEN(s, n)) -#----- That's all, folks ---------------------------------------------------- +###----- That's all, folks --------------------------------------------------