From 803869bc2d77e95aade23e04bf110dfef4092b2e Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Mon, 7 Oct 2019 13:36:41 +0100 Subject: [PATCH] @@@ lbuf needs test --- defs.pxi | 12 ++++- lbuf.pyx | 178 +++++++++++++++++++++++++++++++++++++++++++------------------- mLib.pyx | 4 +- test.py | 31 ++++++++++- utils.pyx | 16 +++--- 5 files changed, 173 insertions(+), 68 deletions(-) diff --git a/defs.pxi b/defs.pxi index 903a6d0..86cd72e 100644 --- a/defs.pxi +++ b/defs.pxi @@ -96,8 +96,14 @@ cdef extern from 'Python.h': pass int PyObject_AsReadBuffer(obj, void **buf, Py_ssize_t *len) except -1 + IF PYVERSION >= (3,): + object PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t sz, + const char *errors, + Py_ssize_t *used_out) + object PyInt_FromLong(long i) object PyLong_FromUnsignedLong(unsigned long i) + void PyErr_Clear() void Py_INCREF(PyObject *obj) @@ -278,9 +284,11 @@ cdef extern from 'array.h': cdef extern from 'mLib/lbuf.h': cdef struct lbuf: - int f - int delim + unsigned f + unsigned delim + size_t len size_t sz + char *buf enum: LBUF_ENABLE _LBUF_CRLF "LBUF_CRLF" diff --git a/lbuf.pyx b/lbuf.pyx index c5f2688..ba3bee3 100644 --- a/lbuf.pyx +++ b/lbuf.pyx @@ -32,98 +32,160 @@ cdef class LineBuffer: Split an incoming stream into lines. """ + cdef lbuf b - cdef _line - cdef _eof - def __cinit__(me, lineproc = None, eofproc = None, *hunoz, **hukairz): + cdef object _line + cdef object _eof + + def __cinit__(me): lbuf_init(&me.b, _lbfunc, me) - me._line = _checkcallable(lineproc, 'line proc') - me._eof = _checkcallable(eofproc, 'eof proc') + me._line = None + me._eof = None def __dealloc__(me): lbuf_destroy(&me.b) - property activep: + + def __init__(me, object lineproc = None, object eofproc = None): + me._line = _checkcallable(lineproc, 'line proc') + me._eof = _checkcallable(eofproc, 'eof proc') + + @property + def activep(me): """LB.activep -> BOOL: is the buffer still active?""" - def __get__(me): - return _tobool(me.b.f & LBUF_ENABLE) - property delim: + return (me.b.f & LBUF_ENABLE) + + @property + def delim(me): """LB.delim -> CHAR | LBUF_...: line-end delimiter""" - def __get__(me): - if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF: - return me.b.delim - else: - return chr(me.b.delim) - def __set__(me, d): - if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF: - me.b.delim = d - else: - me.b.delim = ord(d) - property size: + if me.b.delim == _LBUF_CRLF or me.b.delim == _LBUF_STRICTCRLF: + return me.b.delim + else: + return chr(me.b.delim) + @delim.setter + def delim(me, d): + cdef ch + if d == _LBUF_CRLF or d == _LBUF_STRICTCRLF: + me.b.delim = d + else: + me.b.delim = ord(d) + + @property + def size(me): """LB.size -> INT: buffer size limit""" - def __get__(me): - return me.b.sz - def __set__(me, sz): - if sz <= 0: - raise TypeError, 'size must be positive' - lbuf_setsize(&me.b, sz) - property lineproc: + return me.b.sz + @size.setter + def size(me, size_t sz): + lbuf_setsize(&me.b, sz) + + @property + def lineproc(me): """LB.lineproc -> FUNC: call FUNC(LINE) on each line""" - def __get__(me): - return me._line - def __set__(me, proc): - me._line = _checkcallable(proc, 'line proc') - def __del__(me): - me._line = None - property eofproc: + return me._line + @lineproc.setter + def lineproc(me, object proc): + me._line = _checkcallable(proc, 'line proc') + @lineproc.deleter + def lineproc(me): + me._line = None + + @property + def eofproc(me): """LB.eofproc -> FUNC: call FUNC() at end-of-file""" - def __get__(me): - return me._eof - def __set__(me, proc): - me._eof = _checkcallable(proc, 'eof proc') - def __del__(me): - me._eof = None + return me._eof + @eofproc.setter + def eofproc(me, object proc): + me._eof = _checkcallable(proc, 'eof proc') + @eofproc.deleter + def eofproc(me): + me._eof = None + def enable(me): """LB.enable(): enable the buffer, allowing lines to be emitted""" if me.b.f & LBUF_ENABLE: - raise ValueError, 'already enabled' + raise ValueError('already enabled') me.b.f = me.b.f | LBUF_ENABLE me.enabled() return me + def disable(me): """LB.disable(): disable the buffer, suspending line emission""" if not (me.b.f & LBUF_ENABLE): - raise ValueError, 'already disabled' + raise ValueError('already disabled') me.b.f = me.b.f & ~LBUF_ENABLE me.disabled() return me + def close(me): """LB.close(): report the end of the input stream""" if not (me.b.f & LBUF_ENABLE): - raise ValueError, 'buffer disabled' + raise ValueError('buffer disabled') lbuf_close(&me.b) return me - property free: + + @property + def free(me): """LB.free -> INT: amount of space remaining in buffer""" - def __get__(me): - cdef char *p - return lbuf_free(&me.b, &p) + cdef char *p + return lbuf_free(&me.b, &p) + def flush(me, str): """LB.flush(STR) -> insert STR into the buffer and emit lines""" cdef Py_ssize_t len cdef char *p cdef char *q cdef size_t n - PyString_AsStringAndSize(str, &p, &len) + + ## Get the input string as bytes. + TEXT_PTRLEN(str, &p, &len) + + ## Feed the input string into the buffer. while len > 0: n = lbuf_free(&me.b, &q) if n > len: n = len - memcpy(q, p, n) - p = p + n - len = len - n + memcpy(q, p, n); p += n; len -= n if not (me.b.f & LBUF_ENABLE): break lbuf_flush(&me.b, q, n) - return PyString_FromStringAndSize(p, len) + + IF PYVERSION >= (3,): + ## And here we have a problem. The line buffer may have been disabled + ## while we still have text to push through, and the split may be + ## within a UTF-8-encoded scalar. Let's see if there's anything to do + ## before we start worrying too much. + + if len == 0: + ## We pushed all of our data into the buffer, so there's nothing left + ## over. + + pass + + elif me.b.len == me.b.sz: + ## We filled the buffer up, and there was no newline. We already + ## sent the truncated line to the output function, but we still have + ## the remaining piece. Trim any remaining pieces of the UTF-8 + ## scalar from the start of the leftover string. + + while len > 0 and 128 <= p[0] < 192: + p += 1; len -= 1 + + else: + ## The remaining possibility is the tricky one. After accepting a + ## full line, the line function has disabled further input. We've + ## just filled the buffer up and we have stuff left over. If the + ## leftover portion starts midway through a UTF-8-encoded scalar then + ## Python won't let us stuff it back into a string. So work + ## backwards through the buffer until we reach the start of a scalar. + ## + ## This must work, because the only way the tail end of a scalar + ## could be left over is if the start of that scalar came from our + ## original input string. + + while 128 <= p[0] < 192: + p -= 1; len += 1; me.b.len -= 1 + + ## Everything is OK now. + return TEXT_FROMSTRLEN(p, len) + def enabled(me): """LB.enabled(): called when buffer is enabled""" pass @@ -138,11 +200,17 @@ cdef class LineBuffer: return _maybecall(me._eof, ()) cdef void _lbfunc(char *s, size_t n, void *arg): - cdef LineBuffer sb - sb = arg + cdef LineBuffer sb = arg if s is NULL: sb.eof() else: - sb.line(PyString_FromStringAndSize(s, n)) + IF PYVERSION >= (3,): + ## If the input line was too long and has been truncated then there + ## might be an incomplete Unicode scalar at the end. Strip this away. + + while n > 0 and 128 <= s[n - 1] < 192: + n -= 1 + + sb.line(TEXT_FROMSTRLEN(s, n)) ###----- That's all, folks -------------------------------------------------- diff --git a/mLib.pyx b/mLib.pyx index e9e2d7e..fab0f3e 100644 --- a/mLib.pyx +++ b/mLib.pyx @@ -69,10 +69,10 @@ include 'fdutils.pyx' include 'mdup.pyx' ## Other useful stuff. -#include 'stuff.pyx' +include 'stuff.pyx' ## Buffering. -#include 'lbuf.pyx' +include 'lbuf.pyx' #include 'pkbuf.pyx' ## Select stuff. diff --git a/test.py b/test.py index 9cd09e2..6e4a761 100644 --- a/test.py +++ b/test.py @@ -11,6 +11,7 @@ import mLib as M if SYS.version_info >= (3,): def _bin(text): return text.encode() def _text(bin): return bin.decode() + xrange = range else: def _bin(text): return text def _text(bin): return bin @@ -238,7 +239,35 @@ OS.close(fd) must_equal(data, _bin("Hello, world!")) ## mdup -## print(";; test mdup...") +print(";; test mdup...") +def mkfd(): + fd = OS.open(",delete-me", OS.O_WRONLY | OS.O_CREAT, 0o666) + OS.unlink(",delete-me") + return fd +def fid(fd): + st = OS.fstat(fd) + return st.st_dev, st.st_ino +initial = [mkfd() for i in xrange(5)] +ref = [fid(fd) for fd in initial] +op = [(initial[0], initial[1]), + (initial[1], initial[2]), + (initial[2], initial[0]), + (initial[0], initial[3]), + (initial[3], -1), + (initial[0], initial[4])] +M.mdup(op) +for have, want in op: + if want != -1: must_equal(have, want) +must_equal(op[0][0], initial[1]); must_equal(fid(op[0][0]), ref[0]) +must_equal(op[1][0], initial[2]); must_equal(fid(op[1][0]), ref[1]) +must_equal(op[2][0], initial[0]); must_equal(fid(op[2][0]), ref[2]) +must_equal(op[3][0], initial[3]); must_equal(fid(op[3][0]), ref[0]) +pass; must_equal(fid(op[4][0]), ref[3]) +must_equal(op[5][0], initial[4]); must_equal(fid(op[5][0]), ref[0]) +for fd, _ in op: OS.close(fd) + +## (not testing detachtty and daemonize) + ## Done! diff --git a/utils.pyx b/utils.pyx index eab722f..4d7d06f 100644 --- a/utils.pyx +++ b/utils.pyx @@ -38,14 +38,14 @@ cdef int _getfd(object fdobj): fd = fdobj.fileno() return fd -#cdef object _checkcallable(object f, object what): -# if f is not None and not callable(f): -# raise TypeError('%s must be callable' % what) -# return f +cdef object _checkcallable(object f, object what): + if f is not None and not callable(f): + raise TypeError('%s must be callable' % what) + return f -#cdef object _maybecall(object f, object args): -# if f is None: -# return None -# return f(*args) +cdef object _maybecall(object f, object args): + if f is None: + return None + return f(*args) ###----- That's all, folks -------------------------------------------------- -- 2.11.0