/*
* Return a pointer to the portion of str that comes after the last
- * slash or backslash.
+ * slash (or backslash, if `local' is TRUE).
*/
-static char *stripslashes(char *str)
+static char *stripslashes(char *str, int local)
{
char *p;
p = strrchr(str, '/');
if (p) str = p+1;
- p = strrchr(str, '\\');
- if (p) str = p+1;
+ if (local) {
+ p = strrchr(str, '\\');
+ if (p) str = p+1;
+ }
return str;
}
struct fxp_name *names;
int namepos, namelen;
char *dirpath;
+ char *wildcard;
} *scp_sftp_dirstack_head;
static char *scp_sftp_remotepath, *scp_sftp_currentname;
+static char *scp_sftp_wildcard;
static int scp_sftp_targetisdir, scp_sftp_donethistarget;
static int scp_sftp_preserve, scp_sftp_recursive;
static unsigned long scp_sftp_mtime, scp_sftp_atime;
* right at the start, whereas scp_sink_init is called to
* initialise every level of recursion in the protocol.
*/
-void scp_sink_setup(char *source, int preserve, int recursive)
+int scp_sink_setup(char *source, int preserve, int recursive)
{
if (using_sftp) {
- scp_sftp_remotepath = dupstr(source);
+ char *newsource;
+ /*
+ * It's possible that the source string we've been given
+ * contains a wildcard. If so, we must split the directory
+ * away from the wildcard itself (throwing an error if any
+ * wildcardness comes before the final slash) and arrange
+ * things so that a dirstack entry will be set up.
+ */
+ newsource = smalloc(1+strlen(source));
+ if (!wc_unescape(newsource, source)) {
+ /* Yes, here we go; it's a wildcard. Bah. */
+ char *dupsource, *lastpart, *dirpart, *wildcard;
+ dupsource = dupstr(source);
+ lastpart = stripslashes(dupsource, 0);
+ wildcard = dupstr(lastpart);
+ *lastpart = '\0';
+ if (*dupsource && dupsource[1]) {
+ /*
+ * The remains of dupsource are at least two
+ * characters long, meaning the pathname wasn't
+ * empty or just `/'. Hence, we remove the trailing
+ * slash.
+ */
+ lastpart[-1] = '\0';
+ }
+
+ /*
+ * Now we have separated our string into dupsource (the
+ * directory part) and wildcard. Both of these will
+ * need freeing at some point. Next step is to remove
+ * wildcard escapes from the directory part, throwing
+ * an error if it contains a real wildcard.
+ */
+ dirpart = smalloc(1+strlen(dupsource));
+ if (!wc_unescape(dirpart, dupsource)) {
+ tell_user(stderr, "%s: multiple-level wildcards unsupported",
+ source);
+ errs++;
+ sfree(dirpart);
+ sfree(wildcard);
+ sfree(dupsource);
+ return 1;
+ }
+
+ /*
+ * Now we have dirpart (unescaped, ie a valid remote
+ * path), and wildcard (a wildcard). This will be
+ * sufficient to arrange a dirstack entry.
+ */
+ scp_sftp_remotepath = dirpart;
+ scp_sftp_wildcard = wildcard;
+ sfree(dupsource);
+ } else {
+ scp_sftp_remotepath = newsource;
+ scp_sftp_wildcard = NULL;
+ }
scp_sftp_preserve = preserve;
scp_sftp_recursive = recursive;
scp_sftp_donethistarget = 0;
scp_sftp_dirstack_head = NULL;
}
+ return 0;
}
int scp_sink_init(void)
#define SCP_SINK_FILE 1
#define SCP_SINK_DIR 2
#define SCP_SINK_ENDDIR 3
+#define SCP_SINK_RETRY 4 /* not an action; just try again */
struct scp_sink_action {
int action; /* FILE, DIR, ENDDIR */
char *buf; /* will need freeing after use */
*/
struct scp_sftp_dirstack *head = scp_sftp_dirstack_head;
while (head->namepos < head->namelen &&
- is_dots(head->names[head->namepos].filename))
+ (is_dots(head->names[head->namepos].filename) ||
+ (head->wildcard &&
+ !wc_match(head->wildcard,
+ head->names[head->namepos].filename))))
head->namepos++; /* skip . and .. */
if (head->namepos < head->namelen) {
fname = dupcat(head->dirpath, "/",
} else {
/*
* We've come to the end of the list; pop it off
- * the stack and return an ENDDIR action.
+ * the stack and return an ENDDIR action (or RETRY
+ * if this was a wildcard match).
*/
-
+ if (head->wildcard) {
+ act->action = SCP_SINK_RETRY;
+ sfree(head->wildcard);
+ } else {
+ act->action = SCP_SINK_ENDDIR;
+ }
+
sfree(head->dirpath);
sfree(head->names);
scp_sftp_dirstack_head = head->next;
sfree(head);
- act->action = SCP_SINK_ENDDIR;
return 0;
}
}
struct fxp_names *names;
/*
- * It's a directory. If we're not in recursive
- * mode, this just merits a complaint.
+ * It's a directory. If we're not in recursive mode and
+ * we haven't been passed a wildcard from
+ * scp_sink_setup, this just merits a complaint.
*/
- if (!scp_sftp_recursive) {
+ if (!scp_sftp_recursive && !scp_sftp_wildcard) {
tell_user(stderr, "pscp: %s: is a directory", fname);
errs++;
if (must_free_fname) sfree(fname);
/*
* Otherwise, the fun begins. We must fxp_opendir() the
* directory, slurp the filenames into memory, return
- * SCP_SINK_DIR, and set targetisdir. The next time
- * we're called, we will run through the list of
- * filenames one by one.
+ * SCP_SINK_DIR (unless this is a wildcard match), and
+ * set targetisdir. The next time we're called, we will
+ * run through the list of filenames one by one,
+ * matching them against a wildcard if present.
*
* If targetisdir is _already_ set (meaning we're
* already in the middle of going through another such
newitem->dirpath = fname;
else
newitem->dirpath = dupstr(fname);
+ if (scp_sftp_wildcard) {
+ newitem->wildcard = scp_sftp_wildcard;
+ scp_sftp_wildcard = NULL;
+ } else {
+ newitem->wildcard = NULL;
+ }
scp_sftp_dirstack_head = newitem;
- act->action = SCP_SINK_DIR;
- act->buf = dupstr(stripslashes(fname));
- act->name = act->buf;
- act->size = 0; /* duhh, it's a directory */
- act->mode = 07777 & attrs.permissions;
- if (scp_sftp_preserve &&
- (attrs.flags & SSH_FILEXFER_ATTR_ACMODTIME)) {
- act->atime = attrs.atime;
- act->mtime = attrs.mtime;
- act->settime = 1;
- } else
- act->settime = 0;
+ if (newitem->wildcard) {
+ act->action = SCP_SINK_RETRY;
+ } else {
+ act->action = SCP_SINK_DIR;
+ act->buf = dupstr(stripslashes(fname, 0));
+ act->name = act->buf;
+ act->size = 0; /* duhh, it's a directory */
+ act->mode = 07777 & attrs.permissions;
+ if (scp_sftp_preserve &&
+ (attrs.flags & SSH_FILEXFER_ATTR_ACMODTIME)) {
+ act->atime = attrs.atime;
+ act->mtime = attrs.mtime;
+ act->settime = 1;
+ } else
+ act->settime = 0;
+ }
return 0;
} else {
* It's a file. Return SCP_SINK_FILE.
*/
act->action = SCP_SINK_FILE;
- act->buf = dupstr(stripslashes(fname));
+ act->buf = dupstr(stripslashes(fname, 0));
act->name = act->buf;
if (attrs.flags & SSH_FILEXFER_ATTR_SIZE) {
if (uint64_compare(attrs.size,
if (act.action == SCP_SINK_ENDDIR)
return;
+ if (act.action == SCP_SINK_RETRY)
+ continue;
+
if (targisdir) {
/*
* Prevent the remote side from maliciously writing to
*/
char *striptarget, *stripsrc;
- striptarget = stripslashes(act.name);
+ striptarget = stripslashes(act.name, 1);
if (striptarget != act.name) {
tell_user(stderr, "warning: remote host sent a compound"
" pathname - possibly malicious! (ignored)");
}
if (src) {
- stripsrc = stripslashes(src);
+ stripsrc = stripslashes(src, 1);
if (!stripsrc[strcspn(stripsrc, "*?[]")] &&
strcmp(striptarget, stripsrc)) {
tell_user(stderr, "warning: remote host attempted to"
stat_bytes = 0;
stat_starttime = time(NULL);
stat_lasttime = 0;
- stat_name = stripslashes(destfname);
+ stat_name = stripslashes(destfname, 1);
received = 0;
while (received < act.size) {
* filenames returned from Find{First,Next}File.
*/
srcpath = dupstr(src);
- last = stripslashes(srcpath);
+ last = stripslashes(srcpath, 1);
if (last == srcpath) {
last = strchr(srcpath, ':');
if (last)
do_cmd(host, user, cmd);
sfree(cmd);
- scp_sink_setup(src, preserve, recursive);
+ if (scp_sink_setup(src, preserve, recursive))
+ return;
sink(targ, src);
}
--- /dev/null
+/*
+ * Wildcard matching engine for use with SFTP-based file transfer
+ * programs (PSFTP, new-look PSCP): since SFTP has no notion of
+ * getting the remote side to do globbing (and rightly so) we have
+ * to do it locally, by retrieving all the filenames in a directory
+ * and checking each against the wildcard pattern.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Definition of wildcard syntax:
+ *
+ * - * matches any sequence of characters, including zero.
+ * - ? matches exactly one character which can be anything.
+ * - [abc] matches exactly one character which is a, b or c.
+ * - [a-f] matches anything from a through f.
+ * - [^a-f] matches anything _except_ a through f.
+ * - [-_] matches - or _; [^-_] matches anything else. (The - is
+ * non-special if it occurs immediately after the opening
+ * bracket or ^.)
+ * - [a^] matches an a or a ^. (The ^ is non-special if it does
+ * _not_ occur immediately after the opening bracket.)
+ * - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \.
+ * - All other characters are non-special and match themselves.
+ */
+
+/*
+ * The wildcard matching technique we use is very simple and
+ * potentially O(N^2) in running time, but I don't anticipate it
+ * being that bad in reality (particularly since N will be the size
+ * of a filename, which isn't all that much). Perhaps one day, once
+ * PuTTY has grown a regexp matcher for some other reason, I might
+ * come back and reimplement wildcards by translating them into
+ * regexps or directly into NFAs; but for the moment, in the
+ * absence of any other need for the NFA->DFA translation engine,
+ * anything more than the simplest possible wildcard matcher is
+ * vast code-size overkill.
+ *
+ * Essentially, these wildcards are much simpler than regexps in
+ * that they consist of a sequence of rigid fragments (? and [...]
+ * can never match more or less than one character) separated by
+ * asterisks. It is therefore extremely simple to look at a rigid
+ * fragment and determine whether or not it begins at a particular
+ * point in the test string; so we can search along the string
+ * until we find each fragment, then search for the next. As long
+ * as we find each fragment in the _first_ place it occurs, there
+ * will never be a danger of having to backpedal and try to find it
+ * again somewhere else.
+ */
+
+enum {
+ WC_TRAILINGBACKSLASH = 1,
+ WC_UNCLOSEDCLASS,
+ WC_INVALIDRANGE,
+};
+
+/*
+ * Error reporting is done by returning various negative values
+ * from the wildcard routines. Passing any such value to wc_error
+ * will give a human-readable message.
+ */
+const char *wc_error(int value)
+{
+ value = abs(value);
+ switch (value) {
+ case WC_TRAILINGBACKSLASH:
+ return "'\' occurred at end of string (expected another character)";
+ case WC_UNCLOSEDCLASS:
+ return "expected ']' to close character class";
+ case WC_INVALIDRANGE:
+ return "character range was not terminated (']' just after '-')";
+ }
+ return "INTERNAL ERROR: unrecognised wildcard error number";
+}
+
+/*
+ * This is the routine that tests a target string to see if an
+ * initial substring of it matches a fragment. If successful, it
+ * returns 1, and advances both `fragment' and `target' past the
+ * fragment and matching substring respectively. If unsuccessful it
+ * returns zero. If the wildcard fragment suffers a syntax error,
+ * it returns <0 and the precise value indexes into wc_error.
+ */
+static int wc_match_fragment(const char **fragment, const char **target)
+{
+ const char *f, *t;
+
+ f = *fragment;
+ t = *target;
+ /*
+ * The fragment terminates at either the end of the string, or
+ * the first (unescaped) *.
+ */
+ while (*f && *f != '*' && *t) {
+ /*
+ * Extract one character from t, and one character's worth
+ * of pattern from f, and step along both. Return 0 if they
+ * fail to match.
+ */
+ if (*f == '\\') {
+ /*
+ * Backslash, which means f[1] is to be treated as a
+ * literal character no matter what it is. It may not
+ * be the end of the string.
+ */
+ if (!f[1])
+ return -WC_TRAILINGBACKSLASH; /* error */
+ if (f[1] != *t)
+ return 0; /* failed to match */
+ f += 2;
+ } else if (*f == '?') {
+ /*
+ * Question mark matches anything.
+ */
+ f++;
+ } else if (*f == '[') {
+ int invert = 0;
+ int matched = 0;
+ /*
+ * Open bracket introduces a character class.
+ */
+ f++;
+ if (*f == '^') {
+ invert = 1;
+ f++;
+ }
+ while (*f != ']') {
+ if (*f == '\\')
+ f++; /* backslashes still work */
+ if (!*f)
+ return -WC_UNCLOSEDCLASS; /* error again */
+ if (f[1] == '-') {
+ int lower, upper, ourchr;
+ lower = (unsigned char) *f++;
+ f++; /* eat the minus */
+ if (*f == ']')
+ return -WC_INVALIDRANGE; /* different error! */
+ if (*f == '\\')
+ f++; /* backslashes _still_ work */
+ if (!*f)
+ return -WC_UNCLOSEDCLASS; /* error again */
+ upper = (unsigned char) *f++;
+ ourchr = (unsigned char) *t;
+ if (lower > upper) {
+ int t = lower; lower = upper; upper = t;
+ }
+ if (ourchr >= lower && ourchr <= upper)
+ matched = 1;
+ } else {
+ matched |= (*t == *f++);
+ }
+ }
+ if (invert == matched)
+ return 0; /* failed to match character class */
+ f++; /* eat the ] */
+ } else {
+ /*
+ * Non-special character matches itself.
+ */
+ if (*f != *t)
+ return 0;
+ f++;
+ }
+ /*
+ * Now we've done that, increment t past the character we
+ * matched.
+ */
+ t++;
+ }
+ if (!*f || *f == '*') {
+ /*
+ * We have reached the end of f without finding a mismatch;
+ * so we're done. Update the caller pointers and return 1.
+ */
+ *fragment = f;
+ *target = t;
+ return 1;
+ }
+ /*
+ * Otherwise, we must have reached the end of t before we
+ * reached the end of f; so we've failed. Return 0.
+ */
+ return 0;
+}
+
+/*
+ * This is the real wildcard matching routine. It returns 1 for a
+ * successful match, 0 for an unsuccessful match, and <0 for a
+ * syntax error in the wildcard.
+ */
+int wc_match(const char *wildcard, const char *target)
+{
+ int ret;
+
+ /*
+ * Every time we see a '*' _followed_ by a fragment, we just
+ * search along the string for a location at which the fragment
+ * matches. The only special case is when we see a fragment
+ * right at the start, in which case we just call the matching
+ * routine once and give up if it fails.
+ */
+ if (*wildcard != '*') {
+ ret = wc_match_fragment(&wildcard, &target);
+ if (ret <= 0)
+ return ret; /* pass back failure or error alike */
+ }
+
+ while (*wildcard) {
+ assert(*wildcard == '*');
+ while (*wildcard == '*')
+ wildcard++;
+
+ /*
+ * It's possible we've just hit the end of the wildcard
+ * after seeing a *, in which case there's no need to
+ * bother searching any more because we've won.
+ */
+ if (!*wildcard)
+ return 1;
+
+ /*
+ * Now `wildcard' points at the next fragment. So we
+ * attempt to match it against `target', and if that fails
+ * we increment `target' and try again, and so on. When we
+ * find we're about to try matching against the empty
+ * string, we give up and return 0.
+ */
+ ret = 0;
+ while (*target) {
+ const char *save_w = wildcard, *save_t = target;
+
+ ret = wc_match_fragment(&wildcard, &target);
+
+ if (ret < 0)
+ return ret; /* syntax error */
+
+ if (ret > 0 && !*wildcard && *target) {
+ /*
+ * Final special case - literally.
+ *
+ * This situation arises when we are matching a
+ * _terminal_ fragment of the wildcard (that is,
+ * there is nothing after it, e.g. "*a"), and it
+ * has matched _too early_. For example, matching
+ * "*a" against "parka" will match the "a" fragment
+ * against the _first_ a, and then (if it weren't
+ * for this special case) matching would fail
+ * because we're at the end of the wildcard but not
+ * at the end of the target string.
+ *
+ * In this case what we must do is measure the
+ * length of the fragment in the target (which is
+ * why we saved `target'), jump straight to that
+ * distance from the end of the string using
+ * strlen, and match the same fragment again there
+ * (which is why we saved `wildcard'). Then we
+ * return whatever that operation returns.
+ */
+ target = save_t + strlen(save_t) - (target - save_t);
+ wildcard = save_w;
+ return wc_match_fragment(&wildcard, &target);
+ }
+
+ if (ret > 0)
+ break;
+ target++;
+ }
+ if (ret > 0)
+ continue;
+ return 0;
+ }
+
+ /*
+ * If we reach here, it must be because we successfully matched
+ * a fragment and then found ourselves right at the end of the
+ * wildcard. Hence, we return 1 if and only if we are also
+ * right at the end of the target.
+ */
+ return (*target ? 0 : 1);
+}
+
+/*
+ * Another utility routine that translates a non-wildcard string
+ * into its raw equivalent by removing any escaping backslashes.
+ * Expects a target string buffer of anything up to the length of
+ * the original wildcard. You can also pass NULL as the output
+ * buffer if you're only interested in the return value.
+ *
+ * Returns 1 on success, or 0 if a wildcard character was
+ * encountered. In the latter case the output string MAY not be
+ * zero-terminated and you should not use it for anything!
+ */
+int wc_unescape(char *output, const char *wildcard)
+{
+ while (*wildcard) {
+ if (*wildcard == '\\') {
+ wildcard++;
+ /* We are lenient about trailing backslashes in non-wildcards. */
+ if (*wildcard) {
+ if (output)
+ *output++ = *wildcard;
+ wildcard++;
+ }
+ } else if (*wildcard == '*' || *wildcard == '?' ||
+ *wildcard == '[' || *wildcard == ']') {
+ return 0; /* it's a wildcard! */
+ } else {
+ if (output)
+ *output++ = *wildcard;
+ wildcard++;
+ }
+ }
+ *output = '\0';
+ return 1; /* it's clean */
+}
+
+#ifdef TESTMODE
+
+struct test {
+ const char *wildcard;
+ const char *target;
+ int expected_result;
+};
+
+const struct test fragment_tests[] = {
+ /*
+ * We exhaustively unit-test the fragment matching routine
+ * itself, which should save us the need to test all its
+ * intricacies during the full wildcard tests.
+ */
+ {"abc", "abc", 1},
+ {"abc", "abd", 0},
+ {"abc", "abcd", 1},
+ {"abcd", "abc", 0},
+ {"ab[cd]", "abc", 1},
+ {"ab[cd]", "abd", 1},
+ {"ab[cd]", "abe", 0},
+ {"ab[^cd]", "abc", 0},
+ {"ab[^cd]", "abd", 0},
+ {"ab[^cd]", "abe", 1},
+ {"ab\\", "abc", -WC_TRAILINGBACKSLASH},
+ {"ab\\*", "ab*", 1},
+ {"ab\\?", "ab*", 0},
+ {"ab?", "abc", 1},
+ {"ab?", "ab", 0},
+ {"ab[", "abc", -WC_UNCLOSEDCLASS},
+ {"ab[c-", "abb", -WC_UNCLOSEDCLASS},
+ {"ab[c-]", "abb", -WC_INVALIDRANGE},
+ {"ab[c-e]", "abb", 0},
+ {"ab[c-e]", "abc", 1},
+ {"ab[c-e]", "abd", 1},
+ {"ab[c-e]", "abe", 1},
+ {"ab[c-e]", "abf", 0},
+ {"ab[e-c]", "abb", 0},
+ {"ab[e-c]", "abc", 1},
+ {"ab[e-c]", "abd", 1},
+ {"ab[e-c]", "abe", 1},
+ {"ab[e-c]", "abf", 0},
+ {"ab[^c-e]", "abb", 1},
+ {"ab[^c-e]", "abc", 0},
+ {"ab[^c-e]", "abd", 0},
+ {"ab[^c-e]", "abe", 0},
+ {"ab[^c-e]", "abf", 1},
+ {"ab[^e-c]", "abb", 1},
+ {"ab[^e-c]", "abc", 0},
+ {"ab[^e-c]", "abd", 0},
+ {"ab[^e-c]", "abe", 0},
+ {"ab[^e-c]", "abf", 1},
+ {"ab[a^]", "aba", 1},
+ {"ab[a^]", "ab^", 1},
+ {"ab[a^]", "abb", 0},
+ {"ab[^a^]", "aba", 0},
+ {"ab[^a^]", "ab^", 0},
+ {"ab[^a^]", "abb", 1},
+ {"ab[-c]", "ab-", 1},
+ {"ab[-c]", "abc", 1},
+ {"ab[-c]", "abd", 0},
+ {"ab[^-c]", "ab-", 0},
+ {"ab[^-c]", "abc", 0},
+ {"ab[^-c]", "abd", 1},
+ {"ab[\\[-\\]]", "abZ", 0},
+ {"ab[\\[-\\]]", "ab[", 1},
+ {"ab[\\[-\\]]", "ab\\", 1},
+ {"ab[\\[-\\]]", "ab]", 1},
+ {"ab[\\[-\\]]", "ab^", 0},
+ {"ab[^\\[-\\]]", "abZ", 1},
+ {"ab[^\\[-\\]]", "ab[", 0},
+ {"ab[^\\[-\\]]", "ab\\", 0},
+ {"ab[^\\[-\\]]", "ab]", 0},
+ {"ab[^\\[-\\]]", "ab^", 1},
+ {"ab[a-fA-F]", "aba", 1},
+ {"ab[a-fA-F]", "abF", 1},
+ {"ab[a-fA-F]", "abZ", 0},
+};
+
+const struct test full_tests[] = {
+ {"a", "argh", 0},
+ {"a", "ba", 0},
+ {"a", "a", 1},
+ {"a*", "aardvark", 1},
+ {"a*", "badger", 0},
+ {"*a", "park", 0},
+ {"*a", "pArka", 1},
+ {"*a", "parka", 1},
+ {"*a*", "park", 1},
+ {"*a*", "perk", 0},
+ {"?b*r?", "abracadabra", 1},
+ {"?b*r?", "abracadabr", 0},
+ {"?b*r?", "abracadabzr", 0},
+};
+
+int main(void)
+{
+ int i;
+ int fails, passes;
+
+ fails = passes = 0;
+
+ for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
+ const char *f, *t;
+ int eret, aret;
+ f = fragment_tests[i].wildcard;
+ t = fragment_tests[i].target;
+ eret = fragment_tests[i].expected_result;
+ aret = wc_match_fragment(&f, &t);
+ if (aret != eret) {
+ printf("failed test: /%s/ against /%s/ returned %d not %d\n",
+ fragment_tests[i].wildcard, fragment_tests[i].target,
+ aret, eret);
+ fails++;
+ } else
+ passes++;
+ }
+
+ for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
+ const char *f, *t;
+ int eret, aret;
+ f = full_tests[i].wildcard;
+ t = full_tests[i].target;
+ eret = full_tests[i].expected_result;
+ aret = wc_match(f, t);
+ if (aret != eret) {
+ printf("failed test: /%s/ against /%s/ returned %d not %d\n",
+ full_tests[i].wildcard, full_tests[i].target,
+ aret, eret);
+ fails++;
+ } else
+ passes++;
+ }
+
+ printf("passed %d, failed %d\n", passes, fails);
+
+ return 0;
+}
+
+#endif