X-Git-Url: https://git.distorted.org.uk/~mdw/disorder/blobdiff_plain/6aba3f6c378b62fbe5ef1bb1ad1ee1ef473e19c6..d1d4a182d95ffeb0fc607c90842256d9b4ab7c43:/server/trackdb.c diff --git a/server/trackdb.c b/server/trackdb.c index be1aea4..d1878c0 100644 --- a/server/trackdb.c +++ b/server/trackdb.c @@ -1,6 +1,6 @@ /* * This file is part of DisOrder - * Copyright (C) 2005, 2006 Richard Kettlewell + * Copyright (C) 2005, 2006, 2007 Richard Kettlewell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ +/** @file server/trackdb.c + * @brief Track database */ #include #include "types.h" @@ -33,6 +35,8 @@ #include #include #include +#include +#include #include "event.h" #include "mem.h" @@ -43,7 +47,6 @@ #include "configuration.h" #include "syscalls.h" #include "wstat.h" -#include "words.h" #include "printf.h" #include "filepart.h" #include "trackname.h" @@ -52,6 +55,8 @@ #include "cache.h" #include "eventlog.h" #include "hash.h" +#include "unicode.h" +#include "unidata.h" #define RESCAN "disorder-rescan" #define DEADLOCK "disorder-deadlock" @@ -62,22 +67,76 @@ static const char *getpart(const char *track, const struct kvp *p, int *used_db); static int trackdb_alltags_tid(DB_TXN *tid, char ***taglistp); -static int trackdb_get_global_tid(const char *name, - DB_TXN *tid, - const char **rp); +static char **trackdb_new_tid(int *ntracksp, + int maxtracks, + DB_TXN *tid); +static int trackdb_expire_noticed_tid(time_t earliest, DB_TXN *tid); +static char *normalize_tag(const char *s, size_t ns); const struct cache_type cache_files_type = { 86400 }; unsigned long cache_files_hits, cache_files_misses; +/** @brief Set by trackdb_open() */ +int trackdb_existing_database; + /* setup and teardown ********************************************************/ static const char *home; /* home had better not change */ DB_ENV *trackdb_env; /* db environment */ -DB *trackdb_tracksdb; /* the db itself */ -DB *trackdb_prefsdb; /* preferences */ -DB *trackdb_searchdb; /* the search database */ + +/** @brief The tracks database + * - Keys are UTF-8(NFC(unicode(path name))) + * - Values are encoded key-value pairs + * - Data is reconstructable data about tracks that currently exist + */ +DB *trackdb_tracksdb; + +/** @brief The preferences database + * + * - Keys are UTF-8(NFC(unicode(path name))) + * - Values are encoded key-value pairs + * - Data is user data about tracks (that might not exist any more) + * and cannot be reconstructed + */ +DB *trackdb_prefsdb; + +/** @brief The search database + * + * - Keys are UTF-8(NFKC(casefold(search term))) + * - Values are UTF-8(NFC(unicode(path name))) + * - There can be more than one value per key + * - Presence of key,value means that path matches the search terms + * - Only tracks fond in @ref tracks_tracksdb are represented here + * - This database can be reconstructed, it contains no user data + */ +DB *trackdb_searchdb; + +/** @brief The tags database + * + * - Keys are UTF-8(NFKC(casefold(tag))) + * - Values are UTF-8(NFC(unicode(path name))) + * - There can be more than one value per key + * - Presence of key,value means that path matches the tag + * - This is always in sync with the tags preference + * - This database can be reconstructed, it contains no user data + */ DB *trackdb_tagsdb; /* the tags database */ + +/** @brief The global preferences database + * - Keys are UTF-8(NFC(preference)) + * - Values are global preference values + * - Data is user data and cannot be reconstructed + */ DB *trackdb_globaldb; /* global preferences */ + +/** @brief The noticed database + * - Keys are 64-bit big-endian timestamps + * - Values are UTF-8(NFC(unicode(path name))) + * - There can be more than one value per key + * - Presence of key,value means that path was added at the given time + * - Data cannot be reconstructed (but isn't THAT important) + */ +DB *trackdb_noticeddb; /* when track noticed */ static pid_t db_deadlock_pid = -1; /* deadlock manager PID */ static pid_t rescan_pid = -1; /* rescanner PID */ static int initialized, opened; /* state */ @@ -92,9 +151,17 @@ static int compare(DB attribute((unused)) *db_, return compare_path_raw(a->data, a->size, b->data, b->size); } -/* open environment */ -void trackdb_init(int recover) { +/** @brief Open database environment + * @param flags Flags word + * + * Flags should be one of: + * - @ref TRACKDB_NO_RECOVER + * - @ref TRACKDB_NORMAL_RECOVER + * - @ref TRACKDB_FATAL_RECOVER + */ +void trackdb_init(int flags) { int err; + const int recover = flags & TRACKDB_RECOVER_MASK; static int recover_type[] = { 0, DB_RECOVER, DB_RECOVER_FATAL }; /* sanity checks */ @@ -124,7 +191,7 @@ void trackdb_init(int recover) { |DB_CREATE |recover_type[recover], 0666))) - fatal(0, "trackdb_env->open: %s", db_strerror(err)); + fatal(0, "trackdb_env->open %s: %s", config->home, db_strerror(err)); trackdb_env->set_errpfx(trackdb_env, "DB"); trackdb_env->set_errfile(trackdb_env, stderr); trackdb_env->set_verbose(trackdb_env, DB_VERB_DEADLOCK, 1); @@ -148,39 +215,36 @@ static int reap_db_deadlock(ev_source attribute((unused)) *ev, return 0; } -static pid_t subprogram(ev_source *ev, const char *prog) { +static pid_t subprogram(ev_source *ev, const char *prog, + int outputfd) { pid_t pid; - int lfd; /* If we're in the background then trap subprocess stdout/stderr */ - if(!isatty(2)) - lfd = logfd(ev, prog); - else - lfd = -1; if(!(pid = xfork())) { exitfn = _exit; - ev_signal_atfork(ev); + if(ev) + ev_signal_atfork(ev); signal(SIGPIPE, SIG_DFL); - if(lfd != -1) { - xdup2(lfd, 1); - xdup2(lfd, 2); + if(outputfd != -1) { + xdup2(outputfd, 1); + xclose(outputfd); } /* If we were negatively niced, undo it. We don't bother checking for * error, it's not that important. */ setpriority(PRIO_PROCESS, 0, 0); execlp(prog, prog, "--config", configfile, debugging ? "--debug" : "--no-debug", + log_default == &log_syslog ? "--syslog" : "--no-syslog", (char *)0); fatal(errno, "error invoking %s", prog); } - if(lfd != -1) xclose(lfd); return pid; } /* start deadlock manager */ void trackdb_master(ev_source *ev) { assert(db_deadlock_pid == -1); - db_deadlock_pid = subprogram(ev, DEADLOCK); + db_deadlock_pid = subprogram(ev, DEADLOCK, -1); ev_child(ev, db_deadlock_pid, 0, reap_db_deadlock, 0); D(("started deadlock manager")); } @@ -228,16 +292,86 @@ static DB *open_db(const char *path, if((err = db->set_bt_compare(db, compare))) fatal(0, "db->set_bt_compare %s: %s", path, db_strerror(err)); if((err = db->open(db, 0, path, 0, dbtype, - openflags | DB_AUTO_COMMIT, mode))) - fatal(0, "db->open %s: %s", path, db_strerror(err)); + openflags | DB_AUTO_COMMIT, mode))) { + if((openflags & DB_CREATE) || errno != ENOENT) + fatal(0, "db->open %s: %s", path, db_strerror(err)); + db->close(db, 0); + db = 0; + } return db; } -/* open track databases */ -void trackdb_open(void) { +/** @brief Open track databases + * @param Flags flags word + * + * @p flags should be one of: + * - @p TRACKDB_NO_UPGRADE, if no upgrade should be attempted + * - @p TRACKDB_CAN_UPGRADE, if an upgrade may be attempted + * - @p TRACKDB_OPEN_FOR_UPGRADE, if this is disorder-dbupgrade + */ +void trackdb_open(int flags) { + int err; + pid_t pid; + /* sanity checks */ assert(opened == 0); ++opened; + /* check the database version first */ + trackdb_globaldb = open_db("global.db", 0, DB_HASH, 0, 0666); + if(trackdb_globaldb) { + /* This is an existing database */ + const char *s; + long oldversion; + + s = trackdb_get_global("_dbversion"); + /* Close the database again, we'll open it property below */ + if((err = trackdb_globaldb->close(trackdb_globaldb, 0))) + fatal(0, "error closing global.db: %s", db_strerror(err)); + trackdb_globaldb = 0; + /* Convert version string to an integer */ + oldversion = s ? atol(s) : 1; + if(oldversion > config->dbversion) { + /* Database is from the future; we never allow this. */ + fatal(0, "this version of DisOrder is too old for database version %ld", + oldversion); + } + if(oldversion < config->dbversion) { + /* Database version is out of date */ + switch(flags & TRACKDB_UPGRADE_MASK) { + case TRACKDB_NO_UPGRADE: + /* This database needs upgrading but this is not permitted */ + fatal(0, "database needs upgrading from %ld to %ld", + oldversion, config->dbversion); + case TRACKDB_CAN_UPGRADE: + /* This database needs upgrading */ + info("invoking disorder-dbupgrade to upgrade from %ld to %ld", + oldversion, config->dbversion); + pid = subprogram(0, "disorder-dbupgrade", -1); + while(waitpid(pid, &err, 0) == -1 && errno == EINTR) + ; + if(err) + fatal(0, "disorder-dbupgrade %s", wstat(err)); + info("disorder-dbupgrade succeeded"); + break; + case TRACKDB_OPEN_FOR_UPGRADE: + break; + default: + abort(); + } + } + if(oldversion == config->dbversion && (flags & TRACKDB_OPEN_FOR_UPGRADE)) { + /* This doesn't make any sense */ + fatal(0, "database is already at current version"); + } + trackdb_existing_database = 1; + } else { + if(flags & TRACKDB_OPEN_FOR_UPGRADE) { + /* Cannot upgrade a new database */ + fatal(0, "cannot upgrade a database that does not exist"); + } + /* This is a brand new database */ + trackdb_existing_database = 0; + } /* open the databases */ trackdb_tracksdb = open_db("tracks.db", DB_RECNUM, DB_BTREE, DB_CREATE, 0666); @@ -247,6 +381,16 @@ void trackdb_open(void) { DB_DUP|DB_DUPSORT, DB_HASH, DB_CREATE, 0666); trackdb_prefsdb = open_db("prefs.db", 0, DB_HASH, DB_CREATE, 0666); trackdb_globaldb = open_db("global.db", 0, DB_HASH, DB_CREATE, 0666); + trackdb_noticeddb = open_db("noticed.db", + DB_DUPSORT, DB_BTREE, DB_CREATE, 0666); + if(!trackdb_existing_database) { + /* Stash the database version */ + char buf[32]; + + assert(!(flags & TRACKDB_OPEN_FOR_UPGRADE)); + snprintf(buf, sizeof buf, "%ld", config->dbversion); + trackdb_set_global("_dbversion", buf, 0); + } D(("opened databases")); } @@ -267,6 +411,8 @@ void trackdb_close(void) { fatal(0, "error closing prefs.db: %s", db_strerror(err)); if((err = trackdb_globaldb->close(trackdb_globaldb, 0))) fatal(0, "error closing global.db: %s", db_strerror(err)); + if((err = trackdb_noticeddb->close(trackdb_noticeddb, 0))) + fatal(0, "error closing noticed.db: %s", db_strerror(err)); trackdb_tracksdb = trackdb_searchdb = trackdb_prefsdb = 0; trackdb_tagsdb = trackdb_globaldb = 0; D(("closed databases")); @@ -485,21 +631,102 @@ static int is_display_pref(const char *name) { return !strncmp(name, prefix, (sizeof prefix) - 1); } +/** @brief Word_Break property tailor that treats underscores as spaces */ +static int tailor_underscore_Word_Break_Other(uint32_t c) { + switch(c) { + default: + return -1; + case 0x005F: /* LOW LINE (SPACING UNDERSCORE) */ + return unicode_Word_Break_Other; + } +} + +/** @brief Remove all combining characters in-place + * @param s Pointer to start of string + * @param ns Length of string + * @return New, possiblby reduced, length + */ +static size_t remove_combining_chars(uint32_t *s, size_t ns) { + uint32_t *start = s, *t = s, *end = s + ns; + + while(s < end) { + const uint32_t c = *s++; + if(!utf32_combining_class(c)) + *t++ = c; + } + return t - start; +} + +/** @brief Normalize and split a string using a given tailoring */ +static void word_split(struct vector *v, + const char *s, + unicode_property_tailor *pt) { + size_t nw, nt32, i; + uint32_t *t32, **w32; + + /* Convert to UTF-32 */ + if(!(t32 = utf8_to_utf32(s, strlen(s), &nt32))) + return; + /* Erase case distinctions */ + if(!(t32 = utf32_casefold_compat(t32, nt32, &nt32))) + return; + /* Drop combining characters */ + nt32 = remove_combining_chars(t32, nt32); + /* Split into words, treating _ as a space */ + w32 = utf32_word_split(t32, nt32, &nw, pt); + /* Convert words back to UTF-8 and append to result */ + for(i = 0; i < nw; ++i) + vector_append(v, utf32_to_utf8(w32[i], utf32_len(w32[i]), 0)); +} + +/** @brief Normalize a tag + * @param s Tag + * @param ns Length of tag + * @return Normalized string or NULL on error + * + * The return value will be: + * - case-folded + * - have no leading or trailing space + * - have no combining characters + * - all spacing between words will be a single U+0020 SPACE + */ +static char *normalize_tag(const char *s, size_t ns) { + uint32_t *s32, **w32; + size_t ns32, nw32, i; + struct dynstr d[1]; + + if(!(s32 = utf8_to_utf32(s, ns, &ns32))) + return 0; + if(!(s32 = utf32_casefold_compat(s32, ns32, &ns32))) /* ->NFKD */ + return 0; + ns32 = remove_combining_chars(s32, ns32); + /* Split into words, no Word_Break tailoring */ + w32 = utf32_word_split(s32, ns32, &nw32, 0); + /* Compose back into a string */ + dynstr_init(d); + for(i = 0; i < nw32; ++i) { + if(i) + dynstr_append(d, ' '); + dynstr_append_string(d, utf32_to_utf8(w32[i], utf32_len(w32[i]), 0)); + } + dynstr_terminate(d); + return d->vec; +} + /* compute the words of a track name */ static char **track_to_words(const char *track, const struct kvp *p) { struct vector v; - char **w; - int nw; + const char *rootless = track_rootless(track); + if(!rootless) + rootless = track; /* bodge */ vector_init(&v); - if((w = words(casefold(strip_extension(track_rootless(track))), &nw))) - vector_append_many(&v, w, nw); - + rootless = strip_extension(rootless); + word_split(&v, strip_extension(rootless), tailor_underscore_Word_Break_Other); for(; p; p = p->next) if(is_display_pref(p->name)) - if((w = words(casefold(p->value), &nw))) - vector_append_many(&v, w, nw); + word_split(&v, p->value, 0); vector_terminate(&v); return dedupe(v.vec, v.nvec); } @@ -551,7 +778,8 @@ static char **parsetags(const char *s) { /* strip trailing spaces */ while(s > t && s[-1] == ' ') --s; - vector_append(&v, xstrndup(t, s - t)); + /* add tag to list */ + vector_append(&v, normalize_tag(t, (size_t)(s - t))); /* skip intermediate and trailing separators */ while(*s && (!tagchar(*s) || *s == ' ')) ++s; @@ -578,9 +806,15 @@ static int compute_alias(char **aliasp, const char *s = config->alias, *t, *expansion, *part; int c, used_db = 0, slash_prefix, err; struct kvp *at; + const char *const root = find_track_root(track); + if(!root) { + /* Bodge for tracks with no root */ + *aliasp = 0; + return 0; + } dynstr_init(&d); - dynstr_append_string(&d, find_track_root(track)); + dynstr_append_string(&d, root); while((c = (unsigned char)*s++)) { if(c != '{') { dynstr_append(&d, c); @@ -664,7 +898,9 @@ done: /* trackdb_notice() **********************************************************/ -/* notice a track */ +/** @brief notice a possibly new track + * @return @c DB_NOTFOUND if new, 0 if already known + */ int trackdb_notice(const char *track, const char *path) { int err; @@ -682,6 +918,12 @@ int trackdb_notice(const char *track, return err; } +/** @brief notice a possibly new track + * @param track NFC UTF-8 track name + * @param path Raw path name + * @param tid Transaction ID + * @return @c DB_NOTFOUND if new, 0 if already known, @c DB_LOCK_DEADLOCK also + */ int trackdb_notice_tid(const char *track, const char *path, DB_TXN *tid) { @@ -689,13 +931,13 @@ int trackdb_notice_tid(const char *track, struct kvp *t, *a, *p; int t_changed, ret; char *alias, **w; - + /* notice whether the tracks.db entry changes */ t_changed = 0; /* get any existing tracks entry */ if((err = gettrackdata(track, &t, &p, 0, 0, tid)) == DB_LOCK_DEADLOCK) return err; - ret = err; + ret = err; /* 0 or DB_NOTFOUND */ /* this is a real track */ t_changed += kvp_set(&t, "_alias_for", 0); t_changed += kvp_set(&t, "_path", path); @@ -722,6 +964,24 @@ int trackdb_notice_tid(const char *track, /* only store the tracks.db entry if it has changed */ if(t_changed && (err = trackdb_putdata(trackdb_tracksdb, track, t, tid, 0))) return err; + if(ret == DB_NOTFOUND) { + uint32_t timestamp[2]; + time_t now; + DBT key, data; + + time(&now); + timestamp[0] = htonl((uint64_t)now >> 32); + timestamp[1] = htonl((uint32_t)now); + memset(&key, 0, sizeof key); + key.data = timestamp; + key.size = sizeof timestamp; + switch(err = trackdb_noticeddb->put(trackdb_noticeddb, tid, &key, + make_key(&data, track), 0)) { + case 0: break; + case DB_LOCK_DEADLOCK: return err; + default: fatal(0, "error updating noticed.db: %s", db_strerror(err)); + } + } return ret; } @@ -841,11 +1101,43 @@ static int get_stats(struct vector *v, return 0; } +/** @brief One entry in the search league */ struct search_entry { char *word; int n; }; +/** @brief Add a word to the search league + * @param se Pointer to search league + * @param count Maximum size for search league + * @param nse Current size of search league + * @param word New word, or NULL + * @param n How often @p word appears + * @return New size of search league + */ +static int register_search_entry(struct search_entry *se, + int count, + int nse, + char *word, + int n) { + int i; + + if(word && (nse < count || n > se[nse - 1].n)) { + /* Find the starting point */ + if(nse == count) + i = nse - 1; + else + i = nse++; + /* Find the insertion point */ + while(i > 0 && n > se[i - 1].n) + --i; + memmove(&se[i + 1], &se[i], (nse - i - 1) * sizeof *se); + se[i].word = word; + se[i].n = n; + } + return nse; +} + /* find the top COUNT words in the search database */ static int search_league(struct vector *v, int count, DB_TXN *tid) { struct search_entry *se; @@ -858,25 +1150,14 @@ static int search_league(struct vector *v, int count, DB_TXN *tid) { cursor = trackdb_opencursor(trackdb_searchdb, tid); se = xmalloc(count * sizeof *se); + /* Walk across the whole database counting up the number of times each + * word appears. */ while(!(err = cursor->c_get(cursor, prepare_data(&k), prepare_data(&d), DB_NEXT))) { if(word && wl == k.size && !strncmp(word, k.data, wl)) - ++n; + ++n; /* same word again */ else { -#define FINALIZE() do { \ - if(word && (nse < count || n > se[nse - 1].n)) { \ - if(nse == count) \ - i = nse - 1; \ - else \ - i = nse++; \ - while(i > 0 && n > se[i - 1].n) \ - --i; \ - memmove(&se[i + 1], &se[i], (nse - i) * sizeof *se); \ - se[i].word = word; \ - se[i].n = n; \ - } \ -} while(0) - FINALIZE(); + nse = register_search_entry(se, count, nse, word, n); word = xstrndup(k.data, wl = k.size); n = 1; } @@ -893,7 +1174,7 @@ static int search_league(struct vector *v, int count, DB_TXN *tid) { } if(trackdb_closecursor(cursor)) err = DB_LOCK_DEADLOCK; if(err) return err; - FINALIZE(); + nse = register_search_entry(se, count, nse, word, n); byte_xasprintf(&str, "Top %d search words:", nse); vector_append(v, str); for(i = 0; i < nse; ++i) { @@ -910,7 +1191,6 @@ static int search_league(struct vector *v, int count, DB_TXN *tid) { char **trackdb_stats(int *nstatsp) { DB_TXN *tid; struct vector v; - char *s; vector_init(&v); for(;;) { @@ -926,12 +1206,6 @@ char **trackdb_stats(int *nstatsp) { if(get_stats(&v, trackdb_prefsdb, SI(hash), tid)) goto fail; vector_append(&v, (char *)""); if(search_league(&v, 10, tid)) goto fail; - vector_append(&v, (char *)""); - vector_append(&v, (char *)"Server stats:"); - byte_xasprintf(&s, "track lookup cache hits: %lu", cache_files_hits); - vector_append(&v, (char *)s); - byte_xasprintf(&s, "track lookup cache misses: %lu", cache_files_misses); - vector_append(&v, (char *)s); vector_terminate(&v); break; fail: @@ -942,6 +1216,89 @@ fail: return v.vec; } +struct stats_details { + void (*done)(char *data, void *u); + void *u; + int exited; /* subprocess exited */ + int closed; /* pipe close */ + int wstat; /* wait status from subprocess */ + struct dynstr data[1]; /* data read from pipe */ +}; + +static void stats_complete(struct stats_details *d) { + char *s; + + if(!(d->exited && d->closed)) + return; + byte_xasprintf(&s, "\n" + "Server stats:\n" + "track lookup cache hits: %lu\n" + "track lookup cache misses: %lu\n", + cache_files_hits, + cache_files_misses); + dynstr_append_string(d->data, s); + dynstr_terminate(d->data); + d->done(d->data->vec, d->u); +} + +static int stats_finished(ev_source attribute((unused)) *ev, + pid_t attribute((unused)) pid, + int status, + const struct rusage attribute((unused)) *rusage, + void *u) { + struct stats_details *const d = u; + + d->exited = 1; + if(status) + error(0, "disorder-stats %s", wstat(status)); + stats_complete(d); + return 0; +} + +static int stats_read(ev_source attribute((unused)) *ev, + ev_reader *reader, + void *ptr, + size_t bytes, + int eof, + void *u) { + struct stats_details *const d = u; + + dynstr_append_bytes(d->data, ptr, bytes); + ev_reader_consume(reader, bytes); + if(eof) + d->closed = 1; + stats_complete(d); + return 0; +} + +static int stats_error(ev_source attribute((unused)) *ev, + int errno_value, + void *u) { + struct stats_details *const d = u; + + error(errno_value, "error reading from pipe to disorder-stats"); + d->closed = 1; + stats_complete(d); + return 0; +} + +void trackdb_stats_subprocess(ev_source *ev, + void (*done)(char *data, void *u), + void *u) { + int p[2]; + pid_t pid; + struct stats_details *d = xmalloc(sizeof *d); + + dynstr_init(d->data); + d->done = done; + d->u = u; + xpipe(p); + pid = subprogram(ev, "disorder-stats", p[1]); + xclose(p[1]); + ev_child(ev, pid, 0, stats_finished, d); + ev_reader_new(ev, p[0], stats_read, stats_error, d, "disorder-stats reader"); +} + /* set a pref (remove if value=0) */ int trackdb_set(const char *track, const char *name, @@ -951,6 +1308,10 @@ int trackdb_set(const char *track, int err, cmp; char *oldalias, *newalias, **oldtags = 0, **newtags; + if(value) { + /* TODO: if value matches default then set value=0 */ + } + for(;;) { tid = trackdb_begin_transaction(); if((err = gettrackdata(track, &t, &p, 0, @@ -1269,8 +1630,7 @@ const char *trackdb_random(int tries) { } else { /* No required tags. We pick random record numbers in the database * instead. */ - switch(err = trackdb_tracksdb->stat(trackdb_tracksdb, tid, &sp, - DB_RECORDCOUNT)) { + switch(err = trackdb_tracksdb->stat(trackdb_tracksdb, tid, &sp, 0)) { case 0: break; case DB_LOCK_DEADLOCK: @@ -1523,6 +1883,7 @@ static const char *checktag(const char *s) { char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) { const char **w, *best = 0, *tag; char **twords, **tags; + char *istag; int i, j, n, err, what; DBC *cursor = 0; DBT k, d; @@ -1534,15 +1895,32 @@ char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) { const char *dbname; *ntracks = 0; /* for early returns */ - /* casefold all the words */ + /* normalize all the words */ w = xmalloc(nwordlist * sizeof (char *)); + istag = xmalloc_noptr(nwordlist); for(n = 0; n < nwordlist; ++n) { - w[n] = casefold(wordlist[n]); - if(checktag(w[n])) ++ntags; /* count up tags */ + uint32_t *w32; + size_t nw32; + + w[n] = utf8_casefold_compat(wordlist[n], strlen(wordlist[n]), 0); + if(checktag(w[n])) { + ++ntags; /* count up tags */ + /* Normalize the tag */ + w[n] = normalize_tag(w[n] + 4, strlen(w[n] + 4)); + istag[n] = 1; + } else { + /* Normalize the search term by removing combining characters */ + if(!(w32 = utf8_to_utf32(w[n], strlen(w[n]), &nw32))) + return 0; + nw32 = remove_combining_chars(w32, nw32); + if(!(w[n] = utf32_to_utf8(w32, nw32, 0))) + return 0; + istag[n] = 0; + } } /* find the longest non-stopword */ for(n = 0; n < nwordlist; ++n) - if(!stopword(w[n]) && !checktag(w[n])) + if(!istag[n] && !stopword(w[n])) if(!best || strlen(w[n]) > strlen(best)) best = w[n]; /* TODO: we should at least in principal be able to identify the word or tag @@ -1551,7 +1929,7 @@ char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) { if(ntags && !best) { /* Only tags are listed. We limit to the first and narrow down with the * rest. */ - best = checktag(w[0]); + best = istag[0] ? w[0] : 0; db = trackdb_tagsdb; dbname = "tags"; } else if(best) { @@ -1600,7 +1978,8 @@ char **trackdb_search(char **wordlist, int nwordlist, int *ntracks) { twords = track_to_words(v.vec[n], p); tags = parsetags(kvp_get(p, "tags")); for(i = 0; i < nwordlist; ++i) { - if((tag = checktag(w[i]))) { + if(istag[i]) { + tag = w[i]; /* Track must have this tag */ for(j = 0; tags[j]; ++j) if(!strcmp(tag, tags[j])) break; /* tag found */ @@ -1640,22 +2019,37 @@ int trackdb_scan(const char *root, DB_TXN *tid) { DBC *cursor; DBT k, d; - size_t root_len = strlen(root); - int err; + const size_t root_len = root ? strlen(root) : 0; + int err, cberr; struct kvp *data; + const char *track; cursor = trackdb_opencursor(trackdb_tracksdb, tid); - err = cursor->c_get(cursor, make_key(&k, root), prepare_data(&d), - DB_SET_RANGE); + if(root) + err = cursor->c_get(cursor, make_key(&k, root), prepare_data(&d), + DB_SET_RANGE); + else { + memset(&k, 0, sizeof k); + err = cursor->c_get(cursor, &k, prepare_data(&d), + DB_FIRST); + } while(!err) { - if(k.size > root_len - && !strncmp(k.data, root, root_len) - && ((char *)k.data)[root_len] == '/') { + if(!root + || (k.size > root_len + && !strncmp(k.data, root, root_len) + && ((char *)k.data)[root_len] == '/')) { data = kvp_urldecode(d.data, d.size); - if(kvp_get(data, "_path")) - if((err = callback(xstrndup(k.data, k.size), data, u, tid))) + if(kvp_get(data, "_path")) { + track = xstrndup(k.data, k.size); + /* Advance to the next track before the callback so that the callback + * may safely delete the track */ + err = cursor->c_get(cursor, &k, &d, DB_NEXT); + if((cberr = callback(track, data, u, tid))) { + err = cberr; break; - err = cursor->c_get(cursor, &k, &d, DB_NEXT); + } + } else + err = cursor->c_get(cursor, &k, &d, DB_NEXT); } else break; } @@ -1684,23 +2078,32 @@ static int reap_rescan(ev_source attribute((unused)) *ev, void attribute((unused)) *u) { if(pid == rescan_pid) rescan_pid = -1; if(status) - error(0, "disorderd-rescan: %s", wstat(status)); + error(0, RESCAN": %s", wstat(status)); else - D(("disorderd-rescan terminate: %s", wstat(status))); + D((RESCAN" terminated: %s", wstat(status))); /* Our cache of file lookups is out of date now */ cache_clean(&cache_files_type); + eventlog("rescanned", (char *)0); return 0; } void trackdb_rescan(ev_source *ev) { + int w; + if(rescan_pid != -1) { error(0, "rescan already underway"); return; } - rescan_pid = subprogram(ev, RESCAN); - ev_child(ev, rescan_pid, 0, reap_rescan, 0); - D(("started rescanner")); - + rescan_pid = subprogram(ev, RESCAN, -1); + if(ev) { + ev_child(ev, rescan_pid, 0, reap_rescan, 0); + D(("started rescanner")); + } else { + /* This is the first rescan, we block until it is complete */ + while(waitpid(rescan_pid, &w, 0) < 0 && errno == EINTR) + ; + reap_rescan(0, rescan_pid, w, 0, 0); + } } int trackdb_rescan_cancel(void) { @@ -1717,27 +2120,13 @@ void trackdb_set_global(const char *name, const char *value, const char *who) { DB_TXN *tid; - DBT k, d; int err; int state; - memset(&k, 0, sizeof k); - memset(&d, 0, sizeof d); - k.data = (void *)name; - k.size = strlen(name); - if(value) { - d.data = (void *)value; - d.size = strlen(value); - } for(;;) { tid = trackdb_begin_transaction(); - if(value) - err = trackdb_globaldb->put(trackdb_globaldb, tid, &k, &d, 0); - else - err = trackdb_globaldb->del(trackdb_globaldb, tid, &k, 0); - if(!err || err == DB_NOTFOUND) break; - if(err != DB_LOCK_DEADLOCK) - fatal(0, "error updating database: %s", db_strerror(err)); + if(!(err = trackdb_set_global_tid(name, value, tid))) + break; trackdb_abort_transaction(tid); } trackdb_commit_transaction(tid); @@ -1760,6 +2149,30 @@ void trackdb_set_global(const char *name, reqtracks = 0; } +int trackdb_set_global_tid(const char *name, + const char *value, + DB_TXN *tid) { + DBT k, d; + int err; + + memset(&k, 0, sizeof k); + memset(&d, 0, sizeof d); + k.data = (void *)name; + k.size = strlen(name); + if(value) { + d.data = (void *)value; + d.size = strlen(value); + } + if(value) + err = trackdb_globaldb->put(trackdb_globaldb, tid, &k, &d, 0); + else + err = trackdb_globaldb->del(trackdb_globaldb, tid, &k, 0); + if(err == DB_LOCK_DEADLOCK) return err; + if(err) + fatal(0, "error updating database: %s", db_strerror(err)); + return 0; +} + const char *trackdb_get_global(const char *name) { DB_TXN *tid; int err; @@ -1775,9 +2188,9 @@ const char *trackdb_get_global(const char *name) { return r; } -static int trackdb_get_global_tid(const char *name, - DB_TXN *tid, - const char **rp) { +int trackdb_get_global_tid(const char *name, + DB_TXN *tid, + const char **rp) { DBT k, d; int err; @@ -1795,10 +2208,129 @@ static int trackdb_get_global_tid(const char *name, case DB_LOCK_DEADLOCK: return err; default: - fatal(0, "error updating database: %s", db_strerror(err)); + fatal(0, "error reading database: %s", db_strerror(err)); } } +/** @brief Retrieve the most recently added tracks + * @param ntracksp Where to put count, or 0 + * @param maxtracks Maximum number of tracks to retrieve + * @return null-terminated array of track names + * + * The most recently added track is first in the array. + */ +char **trackdb_new(int *ntracksp, + int maxtracks) { + DB_TXN *tid; + char **tracks; + + for(;;) { + tid = trackdb_begin_transaction(); + tracks = trackdb_new_tid(ntracksp, maxtracks, tid); + if(tracks) + break; + trackdb_abort_transaction(tid); + } + trackdb_commit_transaction(tid); + return tracks; +} + +/** @brief Retrieve the most recently added tracks + * @param ntracksp Where to put count, or 0 + * @param maxtracks Maximum number of tracks to retrieve, or 0 for all + * @param tid Transaction ID + * @return null-terminated array of track names, or NULL on deadlock + * + * The most recently added track is first in the array. + */ +static char **trackdb_new_tid(int *ntracksp, + int maxtracks, + DB_TXN *tid) { + DBC *c; + DBT k, d; + int err = 0; + struct vector tracks[1]; + + vector_init(tracks); + c = trackdb_opencursor(trackdb_noticeddb, tid); + while((maxtracks <= 0 || tracks->nvec < maxtracks) + && !(err = c->c_get(c, prepare_data(&k), prepare_data(&d), DB_PREV))) + vector_append(tracks, xstrndup(d.data, d.size)); + switch(err) { + case 0: /* hit maxtracks */ + case DB_NOTFOUND: /* ran out of tracks */ + break; + case DB_LOCK_DEADLOCK: + trackdb_closecursor(c); + return 0; + default: + fatal(0, "error reading noticed.db: %s", db_strerror(err)); + } + if((err = trackdb_closecursor(c))) + return 0; /* deadlock */ + vector_terminate(tracks); + if(ntracksp) + *ntracksp = tracks->nvec; + return tracks->vec; +} + +/** @brief Expire noticed.db + * @param earliest Earliest timestamp to keep + */ +void trackdb_expire_noticed(time_t earliest) { + DB_TXN *tid; + + for(;;) { + tid = trackdb_begin_transaction(); + if(!trackdb_expire_noticed_tid(earliest, tid)) + break; + trackdb_abort_transaction(tid); + } + trackdb_commit_transaction(tid); +} + +/** @brief Expire noticed.db + * @param earliest Earliest timestamp to keep + * @param tid Transaction ID + * @return 0 or DB_LOCK_DEADLOCK + */ +static int trackdb_expire_noticed_tid(time_t earliest, DB_TXN *tid) { + DBC *c; + DBT k, d; + int err = 0, ret; + time_t when; + uint32_t *kk; + int count = 0; + + c = trackdb_opencursor(trackdb_noticeddb, tid); + while(!(err = c->c_get(c, prepare_data(&k), prepare_data(&d), DB_NEXT))) { + kk = k.data; + when = (time_t)(((uint64_t)ntohl(kk[0]) << 32) + ntohl(kk[1])); + if(when >= earliest) + break; + if((err = c->c_del(c, 0))) { + if(err != DB_LOCK_DEADLOCK) + fatal(0, "error deleting expired noticed.db entry: %s", + db_strerror(err)); + break; + } + ++count; + } + if(err == DB_NOTFOUND) + err = 0; + if(err && err != DB_LOCK_DEADLOCK) + fatal(0, "error expiring noticed.db: %s", db_strerror(err)); + ret = err; + if((err = trackdb_closecursor(c))) { + if(err != DB_LOCK_DEADLOCK) + fatal(0, "error closing cursor: %s", db_strerror(err)); + ret = err; + } + if(!ret && count) + info("expired %d tracks from noticed.db", count); + return ret; +} + /* tidying up ****************************************************************/ void trackdb_gc(void) {