5 ### (c) 2005 Straylight/Edgeware
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of the Python interface to mLib.
12 ### mLib/Python is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
17 ### mLib/Python is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
22 ### You should have received a copy of the GNU General Public License
23 ### along with mLib/Python; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 LBUF_CRLF = _LBUF_CRLF
27 LBUF_STRICTCRLF = _LBUF_STRICTCRLF
29 cdef class LineBuffer:
31 LineBuffer([lineproc = None], [eofproc = None])
33 Split an incoming stream into lines.
41 lbuf_init(&me.b, _lbfunc, <void *>me)
47 def __init__(me, object lineproc = None, object eofproc = None):
48 me._line = _checkcallable(lineproc, 'line proc')
49 me._eof = _checkcallable(eofproc, 'eof proc')
53 """LB.activep -> BOOL: is the buffer still active?"""
54 return <bint>(me.b.f & LBUF_ENABLE)
58 """LB.delim -> CHAR | LBUF_...: line-end delimiter"""
59 if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF:
62 return chr(me.b.delim)
66 if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF:
73 """LB.size -> INT: buffer size limit"""
76 def size(me, size_t sz):
77 lbuf_setsize(&me.b, sz)
81 """LB.lineproc -> FUNC: call FUNC(LINE) on each line"""
84 def lineproc(me, object proc):
85 me._line = _checkcallable(proc, 'line proc')
92 """LB.eofproc -> FUNC: call FUNC() at end-of-file"""
95 def eofproc(me, object proc):
96 me._eof = _checkcallable(proc, 'eof proc')
102 """LB.enable(): enable the buffer, allowing lines to be emitted"""
103 if me.b.f & LBUF_ENABLE:
104 raise ValueError('already enabled')
105 me.b.f = me.b.f | LBUF_ENABLE
110 """LB.disable(): disable the buffer, suspending line emission"""
111 if not (me.b.f & LBUF_ENABLE):
112 raise ValueError('already disabled')
113 me.b.f = me.b.f & ~LBUF_ENABLE
118 """LB.close(): report the end of the input stream"""
119 if not (me.b.f & LBUF_ENABLE):
120 raise ValueError('buffer disabled')
126 """LB.free -> INT: amount of space remaining in buffer"""
128 return lbuf_free(&me.b, &p)
131 """LB.flush(STR) -> insert STR into the buffer and emit lines"""
137 ## Get the input string as bytes.
138 TEXT_PTRLEN(str, &p, &len)
140 ## Feed the input string into the buffer.
142 n = lbuf_free(&me.b, &q)
145 memcpy(q, p, n); p += n; len -= n
146 if not (me.b.f & LBUF_ENABLE):
148 lbuf_flush(&me.b, q, n)
150 IF PYVERSION >= (3,):
151 ## And here we have a problem. The line buffer may have been disabled
152 ## while we still have text to push through, and the split may be
153 ## within a UTF-8-encoded scalar. Let's see if there's anything to do
154 ## before we start worrying too much.
157 ## We pushed all of our data into the buffer, so there's nothing left
162 elif me.b.len == me.b.sz:
163 ## We filled the buffer up, and there was no newline. We already
164 ## sent the truncated line to the output function, but we still have
165 ## the remaining piece. Trim any remaining pieces of the UTF-8
166 ## scalar from the start of the leftover string.
168 while len > 0 and 128 <= <unsigned char>p[0] < 192:
172 ## The remaining possibility is the tricky one. After accepting a
173 ## full line, the line function has disabled further input. We've
174 ## just filled the buffer up and we have stuff left over. If the
175 ## leftover portion starts midway through a UTF-8-encoded scalar then
176 ## Python won't let us stuff it back into a string. So work
177 ## backwards through the buffer until we reach the start of a scalar.
179 ## This must work, because the only way the tail end of a scalar
180 ## could be left over is if the start of that scalar came from our
181 ## original input string.
183 while 128 <= <unsigned char>p[0] < 192:
184 p -= 1; len += 1; me.b.len -= 1
186 ## Everything is OK now.
187 return TEXT_FROMSTRLEN(p, len)
190 """LB.enabled(): called when buffer is enabled"""
193 """LB.disabled(): called when buffer is disabled"""
196 """LB.line(LINE): called for each completed line"""
197 return _maybecall(me._line, (line,))
199 """LB.eof(): called at end-of-file"""
200 return _maybecall(me._eof, ())
202 cdef void _lbfunc(char *s, size_t n, void *arg):
203 cdef LineBuffer sb = <LineBuffer>arg
207 IF PYVERSION >= (3,):
208 ## If the input line was too long and has been truncated then there
209 ## might be an incomplete Unicode scalar at the end. Strip this away.
211 while n > 0 and 128 <= <unsigned char>s[n - 1] < 192:
214 sb.line(TEXT_FROMSTRLEN(s, n))
216 ###----- That's all, folks --------------------------------------------------