3 Copyright 2001, 2002 Michael J. Pomraning <mjp@pilcrow.madison.wi.us>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <sys/types.h>
28 #define open_read(x) (open((x),O_RDONLY|O_NDELAY))
29 /* ala djb's open_foo */
31 #define VERSION "0.32"
32 #define CDBVERSION "0.75"
34 /* ------------------- cdb object -------------------- */
36 static char cdbo_object_doc
[] = "\
37 This object represents a CDB database: a reliable, constant\n\
38 database mapping strings of bytes (\"keys\") to strings of bytes\n\
39 (\"data\"), and designed for fast lookups.\n\
41 Unlike a conventional mapping, CDBs can meaningfully store multiple\n\
42 records under one key (though this feature is not often used).\n\
44 A CDB object 'cdb_o' offers the following interesting attributes:\n\
46 Dict-like Lookup Methods:\n\
47 cdb_o[key], get(key), getnext(), getall(key)\n\
49 Key-based Iteration Methods:\n\
50 keys(), firstkey(), nextkey()\n\
51 (Key-based iteration returns only distinct keys.)\n\
53 Raw Iteration Method:\n\
55 (\"Dumping\" may return the same key more than once.)\n\
58 fd - File descriptor of the underlying cdb.\n\
59 name - Name of the cdb, or None if not known.\n\
60 size - Size of the cdb, or None if not mmap()d.\n\
63 len(cdb_o) returns the total number of items in a cdb,\n\
64 which may or may not exceed the number of distinct keys.\n";
70 PyObject
* name_py
; /* 'filename' or Py_None */
71 PyObject
* getkey
; /* squirreled away for getnext() */
72 uint32 eod
; /* as in cdbdump */
78 staticforward PyTypeObject CdbType
;
81 #define CDBerr PyErr_SetFromErrno(CDBError)
84 cdb_pyread(CdbObject
*cdb_o
, unsigned int len
, uint32 pos
) {
91 if ((pos
> c
->size
) || (c
->size
- pos
< len
))
93 s
= PyString_FromStringAndSize(c
->map
+ pos
, len
);
95 s
= PyString_FromStringAndSize(NULL
, len
);
98 if (lseek(c
->fd
,pos
,SEEK_SET
) == -1) goto ERRNO
;
101 char * buf
= PyString_AsString(s
);
104 Py_BEGIN_ALLOW_THREADS
105 r
= read(c
->fd
,buf
,len
);
108 while ((r
== -1) && (errno
== EINTR
));
109 if (r
== -1) goto ERRNO
;
110 if (r
== 0) goto FORMAT
;
120 PyErr_SetFromErrno(PyExc_RuntimeError
);
130 #define CDBO_CURDATA(x) (cdb_pyread(x, x->c.dlen, x->c.dpos))
133 /* ------------------- CdbObject methods -------------------- */
135 static char cdbo_has_key_doc
[] =
136 "cdb_o.has_key(k) -> 1 (or 0)\n\
138 Returns true if the CDB contains key k.";
141 cdbo_has_key(CdbObject
*self
, PyObject
*args
) {
147 if (!PyArg_ParseTuple(args
, "s#", &key
, &klen
))
150 r
= cdb_find(&self
->c
, key
, klen
);
154 return Py_BuildValue("i", r
);
158 static char cdbo_get_doc
[] =
159 "cdb_o.get(k [, i]) -> data (or None)\n\
161 Fetches the record stored under key k, skipping past the first i\n\
162 records under that key (default: 0). Prepares the next call to\n\
165 Assuming cdb_o.has_key(k) == 1, then all of the following return:\n\
166 the first record stored under key k:\n\
168 cdb_o.get(k) == cdb_o[k] == cdb_o.getall(k)[0]\n";
171 cdbo_get(CdbObject
*self
, PyObject
*args
) {
178 if (!PyArg_ParseTuple(args
, "s#|i:get", &key
, &klen
, &i
))
181 cdb_findstart(&self
->c
);
184 r
= cdb_findnext(&self
->c
, key
, klen
);
185 if (r
== -1) return CDBerr
;
186 if (!r
) return Py_BuildValue("");
191 /* prep. possibly ensuing call to getnext() */
192 Py_XDECREF(self
->getkey
);
193 self
->getkey
= PyString_FromStringAndSize(key
, klen
);
194 if (self
->getkey
== NULL
)
197 return CDBO_CURDATA(self
);
200 static char cdbo_getall_doc
[] =
201 "cdb_o.getall(k) -> ['data', ... ]\n\
203 Return a list of all records stored under key k.";
206 cdbo_getall(CdbObject
*self
, PyObject
*args
) {
208 PyObject
* list
, * data
;
213 if (!PyArg_ParseTuple(args
, "s#:getall", &key
, &klen
))
216 list
= PyList_New(0);
218 if (list
== NULL
) return NULL
;
220 cdb_findstart(&self
->c
);
222 while ((r
= cdb_findnext(&self
->c
, key
, klen
))) {
227 data
= CDBO_CURDATA(self
);
232 err
= PyList_Append(list
, data
);
244 static char cdbo_getnext_doc
[] =
245 "cdb_o.getnext() -> 'data' (or None)\n\
247 For iteration over the records stored under one key, avoiding loading\n\
248 all items into memory). The \"current key\" is determined by the most\n\
249 recent call to get().\n\
251 The following loops through all items stored under key k:\n\
253 ## cdb_o.getall(k) possibly too big for memory\n\
254 rec = cdb_o.get(k)\n\
255 while rec is not None:\n\
257 rec = cdb_o.getnext()\n";
260 cdbo_getnext(CdbObject
*self
, PyObject
*args
) {
262 if (!PyArg_ParseTuple(args
, ":getnext"))
265 if (self
->getkey
== NULL
) {
266 PyErr_SetString(PyExc_TypeError
,
267 "getnext() called without first calling get()");
271 switch(cdb_findnext(&self
->c
,
272 PyString_AsString(self
->getkey
),
273 PyString_Size(self
->getkey
))) {
277 Py_DECREF(self
->getkey
);
279 return Py_BuildValue("");
281 return CDBO_CURDATA(self
);
287 _cdbo_init_eod(CdbObject
*self
) {
291 if (cdb_read(&self
->c
, nonce
, 4, 0) == -1)
294 uint32_unpack(nonce
, &self
->eod
);
301 * _cdbo_keyiter(cdb_o)
303 * Whiz-bang all-in-one:
304 * extract current record
305 * compare current pos to pos implied by cdb_find(current_key)
306 * (Different? adv. iter cursor, loop and try again)
307 * advance iteration cursor
312 _cdbo_keyiter(CdbObject
*self
) {
319 _cdbo_init_eod(self
);
321 while (self
->iter_pos
< self
->eod
) {
322 if (cdb_read(&self
->c
, buf
, 8, self
->iter_pos
) == -1)
325 uint32_unpack(buf
, &klen
);
326 uint32_unpack(buf
+4, &dlen
);
328 key
= cdb_pyread(self
, klen
, self
->iter_pos
+ 8);
333 switch(cdb_find(&self
->c
,PyString_AsString(key
),PyString_Size(key
))) {
339 /* bizarre, impossible? PyExc_RuntimeError? */
340 PyErr_SetString(PyExc_KeyError
,
341 PyString_AS_STRING((PyStringObject
*) key
));
345 if (key
== NULL
) /* already raised error */
348 if (cdb_datapos(&self
->c
) == self
->iter_pos
+ klen
+ 8) {
349 /** first occurrence of key in the cdb **/
350 self
->iter_pos
+= 8 + klen
+ dlen
;
353 Py_DECREF(key
); /* better luck next time around */
354 self
->iter_pos
+= 8 + klen
+ dlen
;
358 return Py_BuildValue(""); /* iter_pos >= eod; we're done */
362 static char cdbo_keys_doc
[] =
363 "cdb_o.keys() -> ['k1', 'k2', ... ]\n\
365 Returns a list of all (distinct) keys in the database.";
368 cdbo_keys(CdbObject
*self
, PyObject
*args
) {
374 if (! PyArg_ParseTuple(args
, ""))
381 pos
= self
->iter_pos
; /* don't interrupt a manual iteration */
383 self
->iter_pos
= 2048;
385 key
= _cdbo_keyiter(self
);
386 while (key
!= Py_None
) {
387 err
= PyList_Append(r
, key
);
391 self
->iter_pos
= pos
;
394 key
= _cdbo_keyiter(self
);
398 self
->iter_pos
= pos
;
404 static char cdbo_firstkey_doc
[] =
405 "cdb_o.firstkey() -> key (or None)\n\
407 Return the first key in the database, resetting the internal\n\
408 iteration cursor. firstkey() and nextkey() may be used to\n\
409 traverse all distinct keys in the cdb. See each() for raw\n\
413 cdbo_firstkey(CdbObject
*self
, PyObject
*args
) {
415 if (! PyArg_ParseTuple(args
, ":firstkey"))
418 self
->iter_pos
= 2048;
420 return _cdbo_keyiter(self
);
424 static char cdbo_nextkey_doc
[] =
425 "cdb_o.nextkey() -> key (or None)\n\
427 Return the next distinct key in the cdb.\n\
429 The following code walks the CDB one key at a time:\n\
431 key = cdb_o.firstkey()\n\
432 while key is not None:\n\
434 key = cdb_o.nextkey()\n";
437 cdbo_nextkey(CdbObject
*self
, PyObject
*args
) {
439 if (! PyArg_ParseTuple(args
, ":nextkey"))
442 return _cdbo_keyiter(self
);
446 static char cdbo_each_doc
[] =
447 "cdb_o.each() -> (key, data) (or None)\n\
449 Fetch the next ('key', 'data') record from the underlying cdb file,\n\
450 returning None and resetting the iteration cursor when all records\n\
451 have been fetched.\n\
453 Keys appear with each item under them -- e.g., (key,foo), (key2,bar),\n\
454 (key,baz) -- order of records is determined by actual position on\n\
455 disk. Both keys() and (for GDBM fanciers) firstkey()/nextkey()-style\n\
456 iteration go to pains to present the user with only distinct keys.";
459 cdbo_each(CdbObject
*self
, PyObject
*args
) {
461 PyObject
*tup
, *key
, *dat
;
465 if (! PyArg_ParseTuple(args
, ":each"))
468 tup
= PyTuple_New(2);
473 (void) _cdbo_init_eod(self
);
475 if (self
->each_pos
>= self
->eod
) { /* all done, reset cursor */
476 self
->each_pos
= 2048;
481 if (cdb_read(&self
->c
, buf
, 8, self
->each_pos
) == -1)
484 uint32_unpack(buf
, &klen
);
485 uint32_unpack(buf
+4, &dlen
);
487 key
= cdb_pyread(self
, klen
, self
->each_pos
+ 8);
488 dat
= cdb_pyread(self
, dlen
, self
->each_pos
+ 8 + klen
);
490 self
->each_pos
+= klen
+ dlen
+ 8;
492 if (key
== NULL
|| dat
== NULL
) {
493 Py_XDECREF(key
); Py_XDECREF(dat
);
498 if (PyTuple_SetItem(tup
, 0, key
) || PyTuple_SetItem(tup
, 1, dat
)) {
499 Py_DECREF(key
); Py_DECREF(dat
); Py_DECREF(tup
);
506 /*** cdb object as mapping ***/
509 cdbo_length(CdbObject
*self
) {
511 if (! self
->numrecords
) {
513 uint32 pos
, klen
, dlen
;
518 (void) _cdbo_init_eod(self
);
520 while (pos
< self
->eod
) {
521 if (cdb_read(&self
->c
, buf
, 8, pos
) == -1)
523 uint32_unpack(buf
, &klen
);
524 uint32_unpack(buf
+ 4, &dlen
);
525 pos
+= 8 + klen
+ dlen
;
529 return (int) self
->numrecords
;
533 cdbo_subscript(CdbObject
*self
, PyObject
*k
) {
537 if (! PyArg_Parse(k
, "s#", &key
, &klen
))
540 switch(cdb_find(&self
->c
, key
, (unsigned int)klen
)) {
544 PyErr_SetString(PyExc_KeyError
,
545 PyString_AS_STRING((PyStringObject
*) k
));
548 return CDBO_CURDATA(self
);
553 static PyMappingMethods cdbo_as_mapping
= {
554 (inquiry
)cdbo_length
,
555 (binaryfunc
)cdbo_subscript
,
559 static PyMethodDef cdb_methods
[] = {
561 {"get", (PyCFunction
)cdbo_get
, METH_VARARGS
,
563 {"getnext", (PyCFunction
)cdbo_getnext
, METH_VARARGS
,
565 {"getall", (PyCFunction
)cdbo_getall
, METH_VARARGS
,
567 {"has_key", (PyCFunction
)cdbo_has_key
, METH_VARARGS
,
569 {"keys", (PyCFunction
)cdbo_keys
, METH_VARARGS
,
571 {"firstkey", (PyCFunction
)cdbo_firstkey
, METH_VARARGS
,
573 {"nextkey", (PyCFunction
)cdbo_nextkey
, METH_VARARGS
,
575 {"each", (PyCFunction
)cdbo_each
, METH_VARARGS
,
580 /* ------------------- cdb operations -------------------- */
583 _wrap_cdb_init(int fd
) { /* constructor implementation */
587 self
= PyObject_NEW(CdbObject
, &CdbType
);
588 if (self
== NULL
) return NULL
;
590 self
->c
.map
= 0; /* break encapsulation -- cdb struct init'd to zero */
591 cdb_init(&self
->c
, fd
);
593 self
->iter_pos
= 2048;
594 self
->each_pos
= 2048;
595 self
->numrecords
= 0;
599 return (PyObject
*) self
;
604 cdbo_constructor(PyObject
*ignore
, PyObject
*args
) {
608 PyObject
*name_attr
= Py_None
;
611 if (! PyArg_ParseTuple(args
, "O:new", &f
))
614 if (PyString_Check(f
)) {
616 if ((fd
= open_read(PyString_AsString(f
))) == -1)
621 } else if (PyInt_Check(f
)) {
623 fd
= (int) PyInt_AsLong(f
);
627 PyErr_SetString(PyExc_TypeError
,
628 "expected filename or file descriptor");
633 self
= _wrap_cdb_init(fd
);
634 if (self
== NULL
) return NULL
;
636 ((CdbObject
*)self
)->name_py
= name_attr
;
637 Py_INCREF(name_attr
);
643 cdbo_dealloc(CdbObject
*self
) { /* del(cdb_o) */
645 if (self
->name_py
!= NULL
) {
647 /* if cdb_o.name is not None: we open()d it ourselves, so close it */
648 if (PyString_Check(self
->name_py
))
651 Py_DECREF(self
->name_py
);
654 Py_XDECREF(self
->getkey
);
662 cdbo_getattr(CdbObject
*self
, char *name
) {
666 r
= Py_FindMethod(cdb_methods
, (PyObject
*) self
, name
);
673 if (!strcmp(name
,"__members__"))
674 return Py_BuildValue("[sss]", "fd", "name", "size");
676 if (!strcmp(name
,"fd")) {
677 return Py_BuildValue("i", self
->c
.fd
); /* cdb_o.fd */
680 if (!strcmp(name
,"name")) {
681 Py_INCREF(self
->name_py
);
682 return self
->name_py
; /* cdb_o.name */
685 if (!strcmp(name
,"size")) { /* cdb_o.size */
686 return self
->c
.map ?
/** mmap()d ? stat.st_size : None **/
687 Py_BuildValue("l", (long) self
->c
.size
) :
691 PyErr_SetString(PyExc_AttributeError
, name
);
696 /* ----------------- cdbmake object ------------------ */
698 static char cdbmake_object_doc
[] =
699 "cdbmake objects resemble the struct cdb_make interface:\n\
701 CDB Construction Methods:\n\
702 add(k, v), finish()\n\
705 fd - fd of underlying CDB, or -1 if finish()ed\n\
706 fn, fntmp - as from the cdb package's cdbmake utility\n\
707 numentries - current number of records add()ed\n";
716 staticforward PyTypeObject CdbMakeType
;
718 #define CDBMAKEerr PyErr_SetFromErrno(PyExc_IOError)
721 /* ----------------- CdbMake methods ------------------ */
724 CdbMake_add(cdbmakeobject
*self
, PyObject
*args
) {
727 unsigned int klen
, dlen
;
729 if (!PyArg_ParseTuple(args
,"s#s#:add",&key
,&klen
,&dat
,&dlen
))
732 if (cdb_make_add(&self
->cm
, key
, klen
, dat
, dlen
) == -1)
735 return Py_BuildValue("");
740 CdbMake_finish(cdbmakeobject
*self
, PyObject
*args
) {
742 if (!PyArg_ParseTuple(args
, ":finish"))
745 if (cdb_make_finish(&self
->cm
) == -1)
748 /* cleanup as in cdb dist's cdbmake */
750 if (fsync(fileno(self
->cm
.fp
)) == -1)
753 if (fclose(self
->cm
.fp
) != 0)
758 if (rename(PyString_AsString(self
->fntmp
),
759 PyString_AsString(self
->fn
)) == -1)
762 return Py_BuildValue("");
765 static PyMethodDef cdbmake_methods
[] = {
766 {"add", (PyCFunction
)CdbMake_add
, METH_VARARGS
,
767 "cm.add(key, data) -> None\n\
769 Add 'key' -> 'data' pair to the underlying CDB." },
770 {"finish", (PyCFunction
)CdbMake_finish
, METH_VARARGS
,
771 "cm.finish() -> None\n\
773 Finish safely composing a new CDB, renaming cm.fntmp to\n\
778 /* ----------------- cdbmake operations ------------------ */
781 new_cdbmake(PyObject
*ignore
, PyObject
*args
) {
784 PyObject
*fn
, *fntmp
;
787 if (! PyArg_ParseTuple(args
, "SS|i", &fn
, &fntmp
))
790 f
= fopen(PyString_AsString(fntmp
), "w+b");
795 self
= PyObject_NEW(cdbmakeobject
, &CdbMakeType
);
796 if (self
== NULL
) return NULL
;
804 if (cdb_make_start(&self
->cm
, f
) == -1) {
810 return (PyObject
*) self
;
814 cdbmake_dealloc(cdbmakeobject
*self
) {
816 Py_XDECREF(self
->fn
);
818 if (self
->fntmp
!= NULL
) {
819 if (self
->cm
.fp
!= NULL
) {
821 unlink(PyString_AsString(self
->fntmp
));
823 Py_DECREF(self
->fntmp
);
830 cdbmake_getattr(cdbmakeobject
*self
, char *name
) {
832 if (!strcmp(name
,"__members__"))
833 return Py_BuildValue("[ssss]", "fd", "fn", "fntmp", "numentries");
835 if (!strcmp(name
,"fd"))
836 return Py_BuildValue("i", fileno(self
->cm
.fp
)); /* self.fd */
838 if (!strcmp(name
,"fn")) {
840 return self
->fn
; /* self.fn */
843 if (!strcmp(name
,"fntmp")) {
844 Py_INCREF(self
->fntmp
);
845 return self
->fntmp
; /* self.fntmp */
848 if (!strcmp(name
,"numentries"))
849 return Py_BuildValue("l", self
->cm
.numentries
); /* self.numentries */
851 return Py_FindMethod(cdbmake_methods
, (PyObject
*) self
, name
);
854 /* ---------------- Type delineation -------------------- */
856 statichere PyTypeObject CdbType
= {
857 /* The ob_type field must be initialized in the module init function
858 * to be portable to Windows without using C++. */
859 PyObject_HEAD_INIT(NULL
)
862 sizeof(CdbObject
), /*tp_basicsize*/
865 (destructor
)cdbo_dealloc
, /*tp_dealloc*/
867 (getattrfunc
)cdbo_getattr
, /*tp_getattr*/
872 0, /*tp_as_sequence*/
873 &cdbo_as_mapping
, /*tp_as_mapping*/
881 cdbo_object_doc
, /*tp_doc*/
884 statichere PyTypeObject CdbMakeType
= {
885 /* The ob_type field must be initialized in the module init function
886 * to be portable to Windows without using C++. */
887 PyObject_HEAD_INIT(NULL
)
889 "cdbmake", /*tp_name*/
890 sizeof(cdbmakeobject
), /*tp_basicsize*/
893 (destructor
)cdbmake_dealloc
, /*tp_dealloc*/
895 (getattrfunc
)cdbmake_getattr
, /*tp_getattr*/
900 0, /*tp_as_sequence*/
909 cdbmake_object_doc
, /*tp_doc*/
912 /* ---------------- exported functions ------------------ */
914 _wrap_cdb_hash(PyObject
*ignore
, PyObject
*args
) {
919 if (! PyArg_ParseTuple(args
, "s#:hash", &s
, &sz
))
922 return Py_BuildValue("l", cdb_hash(s
, (unsigned int) sz
));
926 /* ---------------- cdb Module -------------------- */
928 static PyMethodDef module_functions
[] = {
929 {"init", cdbo_constructor
, METH_VARARGS
,
930 "cdb.init(f) -> cdb_object\n\
932 Open a CDB specified by f and return a cdb object.\n\
933 f may be a filename or an integral file descriptor\n\
934 (e.g., init( sys.stdin.fileno() )...)."},
935 {"cdbmake", new_cdbmake
, METH_VARARGS
,
936 "cdb.cdbmake(cdb, tmp) -> cdbmake_object\n\
938 Interface to the creation of a new CDB file \"cdb\".\n\
940 The cdbmake object first writes records to the temporary file\n\
941 \"tmp\" (records are inserted via the object's add() method).\n\
942 The finish() method then atomically renames \"tmp\" to \"cdb\",\n\
943 ensuring that readers of \"cdb\" need never wait for updates to\n\
946 {"hash", _wrap_cdb_hash
, METH_VARARGS
,
947 "hash(s) -> hashval\n\
949 Compute the 32-bit hash value of some sequence of bytes s."},
953 static char module_doc
[] =
954 "Python adaptation of D. J. Bernstein's constant database (CDB)\n\
955 package. See <http://cr.yp.to/cdb.html>\n\
957 CDB objects, created by init(), provide read-only, dict-like\n\
958 access to cdb files, as well as iterative methods.\n\
960 CDBMake objects, created by cdbmake(), allow for creation and\n\
961 atomic replacement of CDBs.\n\
963 This module defines a new Exception \"error\".";
969 CdbType
.ob_type
= &PyType_Type
;
970 CdbMakeType
.ob_type
= &PyType_Type
;
972 m
= Py_InitModule3("cdb", module_functions
, module_doc
);
974 d
= PyModule_GetDict(m
);
976 CDBError
= PyErr_NewException("cdb.error", NULL
, NULL
);
977 PyDict_SetItemString(d
, "error", CDBError
);
979 PyDict_SetItemString(d
, "__version__",
980 v
= PyString_FromString(VERSION
));
981 PyDict_SetItemString(d
, "__cdb_version__",
982 v
= PyString_FromString(CDBVERSION
));