mdw@git.distorted.org.uk Git - mLib-python/blob - lbuf.pyx

   1 ### -*-pyrex-*-
   2 ###
   3 ### Line buffering
   4 ###
   5 ### (c) 2005 Straylight/Edgeware
   6 ###
   7
   8 ###----- Licensing notice ---------------------------------------------------
   9 ###
  10 ### This file is part of the Python interface to mLib.
  11 ###
  12 ### mLib/Python is free software; you can redistribute it and/or modify
  13 ### it under the terms of the GNU General Public License as published by
  14 ### the Free Software Foundation; either version 2 of the License, or
  15 ### (at your option) any later version.
  16 ###
  17 ### mLib/Python is distributed in the hope that it will be useful,
  18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 ### GNU General Public License for more details.
  21 ###
  22 ### You should have received a copy of the GNU General Public License
  23 ### along with mLib/Python; if not, write to the Free Software Foundation,
  24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 LBUF_CRLF = _LBUF_CRLF
  27 LBUF_STRICTCRLF = _LBUF_STRICTCRLF
  28
  29 cdef class LineBuffer:
  30   """
  31   LineBuffer([lineproc = None], [eofproc = None])
  32
  33   Split an incoming stream into lines.
  34   """
  35
  36   cdef lbuf b
  37   cdef object _line
  38   cdef object _eof
  39
  40   def __cinit__(me):
  41     lbuf_init(&me.b, _lbfunc, <void *>me)
  42     me._line = None
  43     me._eof = None
  44   def __dealloc__(me):
  45     lbuf_destroy(&me.b)
  46
  47   def __init__(me, object lineproc = None, object eofproc = None):
  48     me._line = _checkcallable(lineproc, 'line proc')
  49     me._eof = _checkcallable(eofproc, 'eof proc')
  50
  51   @property
  52   def activep(me):
  53     """LB.activep -> BOOL: is the buffer still active?"""
  54     return <bint>(me.b.f & LBUF_ENABLE)
  55
  56   @property
  57   def delim(me):
  58     """LB.delim -> CHAR | LBUF_...: line-end delimiter"""
  59     if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF:
  60       return me.b.delim
  61     else:
  62       return chr(me.b.delim)
  63   @delim.setter
  64   def delim(me, d):
  65     cdef ch
  66     if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF:
  67       me.b.delim = d
  68     else:
  69       me.b.delim = ord(d)
  70
  71   @property
  72   def size(me):
  73     """LB.size -> INT: buffer size limit"""
  74     return me.b.sz
  75   @size.setter
  76   def size(me, size_t sz):
  77     lbuf_setsize(&me.b, sz)
  78
  79   @property
  80   def lineproc(me):
  81     """LB.lineproc -> FUNC: call FUNC(LINE) on each line"""
  82     return me._line
  83   @lineproc.setter
  84   def lineproc(me, object proc):
  85     me._line = _checkcallable(proc, 'line proc')
  86   @lineproc.deleter
  87   def lineproc(me):
  88     me._line = None
  89
  90   @property
  91   def eofproc(me):
  92     """LB.eofproc -> FUNC: call FUNC() at end-of-file"""
  93     return me._eof
  94   @eofproc.setter
  95   def eofproc(me, object proc):
  96     me._eof = _checkcallable(proc, 'eof proc')
  97   @eofproc.deleter
  98   def eofproc(me):
  99     me._eof = None
 100
 101   def enable(me):
 102     """LB.enable(): enable the buffer, allowing lines to be emitted"""
 103     if me.b.f & LBUF_ENABLE:
 104       raise ValueError('already enabled')
 105     me.b.f = me.b.f | LBUF_ENABLE
 106     me.enabled()
 107     return me
 108
 109   def disable(me):
 110     """LB.disable(): disable the buffer, suspending line emission"""
 111     if not (me.b.f & LBUF_ENABLE):
 112       raise ValueError('already disabled')
 113     me.b.f = me.b.f & ~LBUF_ENABLE
 114     me.disabled()
 115     return me
 116
 117   def close(me):
 118     """LB.close(): report the end of the input stream"""
 119     if not (me.b.f & LBUF_ENABLE):
 120       raise ValueError('buffer disabled')
 121     lbuf_close(&me.b)
 122     return me
 123
 124   @property
 125   def free(me):
 126     """LB.free -> INT: amount of space remaining in buffer"""
 127     cdef char *p
 128     return lbuf_free(&me.b, &p)
 129
 130   def flush(me, str):
 131     """LB.flush(STR) -> insert STR into the buffer and emit lines"""
 132     cdef Py_ssize_t len
 133     cdef char *p
 134     cdef char *q
 135     cdef size_t n
 136
 137     ## Get the input string as bytes.
 138     TEXT_PTRLEN(str, &p, &len)
 139
 140     ## Feed the input string into the buffer.
 141     while len > 0:
 142       n = lbuf_free(&me.b, &q)
 143       if n > len:
 144         n = len
 145       memcpy(q, p, n); p += n; len -= n
 146       if not (me.b.f & LBUF_ENABLE):
 147         break
 148       lbuf_flush(&me.b, q, n)
 149
 150     IF PYVERSION >= (3,):
 151       ## And here we have a problem.  The line buffer may have been disabled
 152       ## while we still have text to push through, and the split may be
 153       ## within a UTF-8-encoded scalar.  Let's see if there's anything to do
 154       ## before we start worrying too much.
 155
 156       if len == 0:
 157         ## We pushed all of our data into the buffer, so there's nothing left
 158         ## over.
 159
 160         pass
 161
 162       elif me.b.len == me.b.sz:
 163         ## We filled the buffer up, and there was no newline.  We already
 164         ## sent the truncated line to the output function, but we still have
 165         ## the remaining piece.  Trim any remaining pieces of the UTF-8
 166         ## scalar from the start of the leftover string.
 167
 168         while len > 0 and 128 <= <unsigned char>p[0] < 192:
 169           p += 1; len -= 1
 170
 171       else:
 172         ## The remaining possibility is the tricky one.  After accepting a
 173         ## full line, the line function has disabled further input.  We've
 174         ## just filled the buffer up and we have stuff left over.  If the
 175         ## leftover portion starts midway through a UTF-8-encoded scalar then
 176         ## Python won't let us stuff it back into a string.  So work
 177         ## backwards through the buffer until we reach the start of a scalar.
 178         ##
 179         ## This must work, because the only way the tail end of a scalar
 180         ## could be left over is if the start of that scalar came from our
 181         ## original input string.
 182
 183         while 128 <= <unsigned char>p[0] < 192:
 184           p -= 1; len += 1; me.b.len -= 1
 185
 186       ## Everything is OK now.
 187       return TEXT_FROMSTRLEN(p, len)
 188
 189   def enabled(me):
 190     """LB.enabled(): called when buffer is enabled"""
 191     pass
 192   def disabled(me):
 193     """LB.disabled(): called when buffer is disabled"""
 194     pass
 195   def line(me, line):
 196     """LB.line(LINE): called for each completed line"""
 197     return _maybecall(me._line, (line,))
 198   def eof(me):
 199     """LB.eof(): called at end-of-file"""
 200     return _maybecall(me._eof, ())
 201
 202 cdef void _lbfunc(char *s, size_t n, void *arg):
 203   cdef LineBuffer sb = <LineBuffer>arg
 204   if s is NULL:
 205     sb.eof()
 206   else:
 207     IF PYVERSION >= (3,):
 208       ## If the input line was too long and has been truncated then there
 209       ## might be an incomplete Unicode scalar at the end.  Strip this away.
 210
 211       while n > 0 and 128 <= <unsigned char>s[n - 1] < 192:
 212         n -= 1
 213
 214     sb.line(TEXT_FROMSTRLEN(s, n))
 215
 216 ###----- That's all, folks --------------------------------------------------