Fix the import --url command
[stgit] / stgit / commands / imprt.py
index fcbe9d3..8067beb 100644 (file)
@@ -15,66 +15,73 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 """
 
-import sys, os, re
-from optparse import OptionParser, make_option
-
+import sys, os, re, email, tarfile
+from mailbox import UnixMailbox
+from StringIO import StringIO
+from stgit.argparse import opt
 from stgit.commands.common import *
 from stgit.utils import *
-from stgit import stack, git
-
-
-help = 'import a GNU diff file as a new patch'
-usage = """%prog [options] [<file>]
-
+from stgit.out import *
+from stgit import argparse, stack, git
+
+name = 'import'
+help = 'Import a GNU diff file as a new patch'
+kind = 'patch'
+usage = ['[options] [<file>|<url>]']
+description = """
 Create a new patch and apply the given GNU diff file (or the standard
 input). By default, the file name is used as the patch name but this
 can be overridden with the '--name' option. The patch can either be a
 normal file with the description at the top or it can have standard
 mail format, the Subject, From and Date headers being used for
-generating the patch information.
+generating the patch information. The command can also read series and
+mbox files.
+
+If a patch does not apply cleanly, the failed diff is written to the
+.stgit-failed.patch file and an empty StGIT patch is added to the
+stack.
 
 The patch description has to be separated from the data with a '---'
 line."""
 
-options = [make_option('-m', '--mail',
-                       help = 'import the patch from a standard e-mail file',
-                       action = 'store_true'),
-           make_option('-n', '--name',
-                       help = 'use NAME as the patch name'),
-           make_option('-t', '--strip',
-                       help = 'strip numbering and extension from patch name',
-                       action = 'store_true'),
-           make_option('-s', '--series',
-                       help = 'import a series of patches',
-                       action = 'store_true'),
-           make_option('-i', '--ignore',
-                       help = 'ignore the applied patches in the series',
-                       action = 'store_true'),
-           make_option('-b', '--base',
-                       help = 'use BASE instead of HEAD for file importing'),
-           make_option('-e', '--edit',
-                       help = 'invoke an editor for the patch description',
-                       action = 'store_true'),
-           make_option('-p', '--showpatch',
-                       help = 'show the patch content in the editor buffer',
-                       action = 'store_true'),
-           make_option('-a', '--author', metavar = '"NAME <EMAIL>"',
-                       help = 'use "NAME <EMAIL>" as the author details'),
-           make_option('--authname',
-                       help = 'use AUTHNAME as the author name'),
-           make_option('--authemail',
-                       help = 'use AUTHEMAIL as the author e-mail'),
-           make_option('--authdate',
-                       help = 'use AUTHDATE as the author date'),
-           make_option('--commname',
-                       help = 'use COMMNAME as the committer name'),
-           make_option('--commemail',
-                       help = 'use COMMEMAIL as the committer e-mail')]
-
-
-def __end_descr(line):
-    return re.match('---\s*$', line) or re.match('diff -', line) or \
-            re.match('Index: ', line)
+args = [argparse.files]
+options = [
+    opt('-m', '--mail', action = 'store_true',
+        short = 'Import the patch from a standard e-mail file'),
+    opt('-M', '--mbox', action = 'store_true',
+        short = 'Import a series of patches from an mbox file'),
+    opt('-s', '--series', action = 'store_true',
+        short = 'Import a series of patches', long = """
+        Import a series of patches from a series file or a tar archive."""),
+    opt('-u', '--url', action = 'store_true',
+        short = 'Import a patch from a URL'),
+    opt('-n', '--name',
+        short = 'Use NAME as the patch name'),
+    opt('-t', '--strip', action = 'store_true',
+        short = 'Strip numbering and extension from patch name'),
+    opt('-i', '--ignore', action = 'store_true',
+        short = 'Ignore the applied patches in the series'),
+    opt('--replace', action = 'store_true',
+        short = 'Replace the unapplied patches in the series'),
+    opt('-b', '--base', args = [argparse.commit],
+        short = 'Use BASE instead of HEAD for file importing'),
+    opt('--reject', action = 'store_true',
+        short = 'leave the rejected hunks in corresponding *.rej files'),
+    opt('-e', '--edit', action = 'store_true',
+        short = 'Invoke an editor for the patch description'),
+    opt('-p', '--showpatch', action = 'store_true',
+        short = 'Show the patch content in the editor buffer'),
+    opt('-a', '--author', metavar = '"NAME <EMAIL>"',
+        short = 'Use "NAME <EMAIL>" as the author details'),
+    opt('--authname',
+        short = 'Use AUTHNAME as the author name'),
+    opt('--authemail',
+        short = 'Use AUTHEMAIL as the author e-mail'),
+    opt('--authdate',
+        short = 'Use AUTHDATE as the author date'),
+    ] + argparse.sign_options()
+
+directory = DirectoryHasRepository(log = True)
 
 def __strip_patch_name(name):
     stripped = re.sub('^[0-9]+-(.*)$', '\g<1>', name)
@@ -82,160 +89,48 @@ def __strip_patch_name(name):
 
     return stripped
 
-def __parse_description(descr):
-    """Parse the patch description and return the new description and
-    author information (if any).
-    """
-    subject = body = ''
-    authname = authemail = authdate = None
-
-    descr_lines = [line.rstrip() for line in  descr.split('\n')]
-    if not descr_lines:
-        raise CmdException, "Empty patch description"
-
-    lasthdr = 0
-    end = len(descr_lines)
-
-    # Parse the patch header
-    for pos in range(0, end):
-        if not descr_lines[pos]:
-           continue
-        # check for a "From|Author:" line
-        if re.match('\s*(?:from|author):\s+', descr_lines[pos], re.I):
-            auth = re.findall('^.*?:\s+(.*)$', descr_lines[pos])[0]
-            authname, authemail = name_email(auth)
-            lasthdr = pos + 1
-            continue
-        # check for a "Date:" line
-        if re.match('\s*date:\s+', descr_lines[pos], re.I):
-            authdate = re.findall('^.*?:\s+(.*)$', descr_lines[pos])[0]
-            lasthdr = pos + 1
-            continue
-        if subject:
-            break
-        # get the subject
-        subject = descr_lines[pos]
-        lasthdr = pos + 1
+def __replace_slashes_with_dashes(name):
+    stripped = name.replace('/', '-')
 
-    # get the body
-    if lasthdr < end:
-        body = reduce(lambda x, y: x + '\n' + y, descr_lines[lasthdr:], '')
-
-    return (subject + body, authname, authemail, authdate)
+    return stripped
 
-def __parse_mail(filename = None):
-    """Parse the input file in a mail format and return (description,
-    authname, authemail, authdate)
+def __create_patch(filename, message, author_name, author_email,
+                   author_date, diff, options):
+    """Create a new patch on the stack
     """
-    if filename:
-        f = file(filename)
+    if options.name:
+        patch = options.name
+    elif filename:
+        patch = os.path.basename(filename)
     else:
-        f = sys.stdin
+        patch = ''
+    if options.strip:
+        patch = __strip_patch_name(patch)
 
-    descr = authname = authemail = authdate = None
-
-    # parse the headers
-    while True:
-        line = f.readline()
-        if not line:
-            break
-        line = line.strip()
-        if re.match('from:\s+', line, re.I):
-            auth = re.findall('^.*?:\s+(.*)$', line)[0]
-            authname, authemail = name_email(auth)
-        elif re.match('date:\s+', line, re.I):
-            authdate = re.findall('^.*?:\s+(.*)$', line)[0]
-        elif re.match('subject:\s+', line, re.I):
-            descr = re.findall('^.*?:\s+(.*)$', line)[0]
-        elif line == '':
-            # end of headers
-            break
-
-    # remove the '[*PATCH*]' expression in the subject
-    if descr:
-        descr = re.findall('^(\[[^\s]*[Pp][Aa][Tt][Cc][Hh].*?\])?\s*(.*)$',
-                           descr)[0][1]
-        descr += '\n\n'
-    else:
-        raise CmdException, 'Subject: line not found'
-
-    # the rest of the patch description
-    while True:
-        line = f.readline()
-        if not line:
-            break
-        if __end_descr(line):
-            break
+    if not patch:
+        if options.ignore or options.replace:
+            unacceptable_name = lambda name: False
         else:
-            descr += line
-    descr.rstrip()
-
-    if filename:
-        f.close()
-
-    # parse the description for author information
-    descr, descr_authname, descr_authemail, descr_authdate = __parse_description(descr)
-    if descr_authname:
-        authname = descr_authname
-    if descr_authemail:
-        authemail = descr_authemail
-    if descr_authdate:
-       authdate = descr_authdate
-
-    return (descr, authname, authemail, authdate)
-
-def __parse_patch(filename = None):
-    """Parse the input file and return (description, authname,
-    authemail, authdate)
-    """
-    if filename:
-        f = file(filename)
+            unacceptable_name = crt_series.patch_exists
+        patch = make_patch_name(message, unacceptable_name)
     else:
-        f = sys.stdin
-
-    descr = ''
-    while True:
-        line = f.readline()
-        if not line:
-            break
-
-        if __end_descr(line):
-            break
-        else:
-            descr += line
-    descr.rstrip()
-
-    if filename:
-        f.close()
-
-    descr, authname, authemail, authdate = __parse_description(descr)
-
-    # we don't yet have an agreed place for the creation date.
-    # Just return None
-    return (descr, authname, authemail, authdate)
-
-def __import_patch(patch, filename, options):
-    """Import a patch from a file or standard input
-    """
-    # the defaults
-    message = author_name = author_email = author_date = committer_name = \
-              committer_email = None
-
-    if options.author:
-        options.authname, options.authemail = name_email(options.author)
+        # fix possible invalid characters in the patch name
+        patch = re.sub('[^\w.]+', '-', patch).strip('-')
 
-    if options.mail:
-        message, author_name, author_email, author_date = \
-                 __parse_mail(filename)
-    else:
-        message, author_name, author_email, author_date = \
-                 __parse_patch(filename)
+    if options.ignore and patch in crt_series.get_applied():
+        out.info('Ignoring already applied patch "%s"' % patch)
+        return
+    if options.replace and patch in crt_series.get_unapplied():
+        crt_series.delete_patch(patch, keep_log = True)
 
     # refresh_patch() will invoke the editor in this case, with correct
     # patch content
     if not message:
         can_edit = False
 
+    if options.author:
+        options.authname, options.authemail = name_email(options.author)
+
     # override the automatically parsed settings
     if options.authname:
         author_name = options.authname
@@ -243,30 +138,79 @@ def __import_patch(patch, filename, options):
         author_email = options.authemail
     if options.authdate:
         author_date = options.authdate
-    if options.commname:
-        committer_name = options.commname
-    if options.commemail:
-        committer_email = options.commemail
 
     crt_series.new_patch(patch, message = message, can_edit = False,
                          author_name = author_name,
                          author_email = author_email,
-                         author_date = author_date,
-                         committer_name = committer_name,
-                         committer_email = committer_email)
+                         author_date = author_date)
+
+    if not diff:
+        out.warn('No diff found, creating empty patch')
+    else:
+        out.start('Importing patch "%s"' % patch)
+        if options.base:
+            base = git_id(crt_series, options.base)
+        else:
+            base = None
+        git.apply_patch(diff = diff, base = base, reject = options.reject)
+        crt_series.refresh_patch(edit = options.edit,
+                                 show_patch = options.showpatch,
+                                 sign_str = options.sign_str,
+                                 backup = False)
+        out.done()
+
+def __mkpatchname(name, suffix):
+    if name.lower().endswith(suffix.lower()):
+        return name[:-len(suffix)]
+    return name
+
+def __get_handle_and_name(filename):
+    """Return a file object and a patch name derived from filename
+    """
+    # see if it's a gzip'ed or bzip2'ed patch
+    import bz2, gzip
+    for copen, ext in [(gzip.open, '.gz'), (bz2.BZ2File, '.bz2')]:
+        try:
+            f = copen(filename)
+            f.read(1)
+            f.seek(0)
+            return (f, __mkpatchname(filename, ext))
+        except IOError, e:
+            pass
+
+    # plain old file...
+    return (open(filename), filename)
+
+def __import_file(filename, options, patch = None):
+    """Import a patch from a file or standard input
+    """
+    pname = None
+    if filename:
+        (f, pname) = __get_handle_and_name(filename)
+    else:
+        f = sys.stdin
 
-    print 'Importing patch "%s"...' % patch,
-    sys.stdout.flush()
+    if patch:
+        pname = patch
+    elif not pname:
+        pname = filename
 
-    if options.base:
-        git.apply_patch(filename, git_id(options.base))
+    if options.mail:
+        try:
+            msg = email.message_from_file(f)
+        except Exception, ex:
+            raise CmdException, 'error parsing the e-mail file: %s' % str(ex)
+        message, author_name, author_email, author_date, diff = \
+                 parse_mail(msg)
     else:
-        git.apply_patch(filename)
+        message, author_name, author_email, author_date, diff = \
+                 parse_patch(f.read(), contains_diff = True)
 
-    crt_series.refresh_patch(edit = options.edit,
-                             show_patch = options.showpatch)
+    if filename:
+        f.close()
 
-    print 'done'
+    __create_patch(pname, message, author_name, author_email,
+                   author_date, diff, options)
 
 def __import_series(filename, options):
     """Import a series of patches
@@ -274,6 +218,9 @@ def __import_series(filename, options):
     applied = crt_series.get_applied()
 
     if filename:
+        if tarfile.is_tarfile(filename):
+            __import_tarfile(filename, options)
+            return
         f = file(filename)
         patchdir = os.path.dirname(filename)
     else:
@@ -285,14 +232,85 @@ def __import_series(filename, options):
         if not patch:
             continue
         patchfile = os.path.join(patchdir, patch)
+        patch = __replace_slashes_with_dashes(patch);
 
-        if options.strip:
-            patch = __strip_patch_name(patch)
-        if options.ignore and patch in applied:
-            print 'Ignoring already applied patch "%s"' % patch
-            continue
+        __import_file(patchfile, options, patch)
 
-        __import_patch(patch, patchfile, options)
+    if filename:
+        f.close()
+
+def __import_mbox(filename, options):
+    """Import a series from an mbox file
+    """
+    if filename:
+        f = file(filename, 'rb')
+    else:
+        f = StringIO(sys.stdin.read())
+
+    try:
+        mbox = UnixMailbox(f, email.message_from_file)
+    except Exception, ex:
+        raise CmdException, 'error parsing the mbox file: %s' % str(ex)
+
+    for msg in mbox:
+        message, author_name, author_email, author_date, diff = \
+                 parse_mail(msg)
+        __create_patch(None, message, author_name, author_email,
+                       author_date, diff, options)
+
+    f.close()
+
+def __import_url(url, options):
+    """Import a patch from a URL
+    """
+    import urllib
+    import tempfile
+
+    if not url:
+        raise CmdException('URL argument required')
+
+    patch = os.path.basename(urllib.unquote(url))
+    filename = os.path.join(tempfile.gettempdir(), patch)
+    urllib.urlretrieve(url, filename)
+    __import_file(filename, options)
+
+def __import_tarfile(tar, options):
+    """Import patch series from a tar archive
+    """
+    import tempfile
+    import shutil
+
+    if not tarfile.is_tarfile(tar):
+        raise CmdException, "%s is not a tarfile!" % tar
+
+    t = tarfile.open(tar, 'r')
+    names = t.getnames()
+
+    # verify paths in the tarfile are safe
+    for n in names:
+        if n.startswith('/'):
+            raise CmdException, "Absolute path found in %s" % tar
+        if n.find("..") > -1:
+            raise CmdException, "Relative path found in %s" % tar
+
+    # find the series file
+    seriesfile = '';
+    for m in names:
+        if m.endswith('/series') or m == 'series':
+            seriesfile = m
+            break
+    if seriesfile == '':
+        raise CmdException, "no 'series' file found in %s" % tar
+
+    # unpack into a tmp dir
+    tmpdir = tempfile.mkdtemp('.stg')
+    t.extractall(tmpdir)
+
+    # apply the series
+    __import_series(os.path.join(tmpdir, seriesfile), options)
+
+    # cleanup the tmpdir
+    shutil.rmtree(tmpdir)
 
 def func(parser, options, args):
     """Import a GNU diff file as a new patch
@@ -302,25 +320,24 @@ def func(parser, options, args):
 
     check_local_changes()
     check_conflicts()
-    check_head_top_equal()
+    check_head_top_equal(crt_series)
 
     if len(args) == 1:
         filename = args[0]
     else:
         filename = None
 
+    if not options.url and filename:
+        filename = os.path.abspath(filename)
+    directory.cd_to_topdir()
+
     if options.series:
         __import_series(filename, options)
+    elif options.mbox:
+        __import_mbox(filename, options)
+    elif options.url:
+        __import_url(filename, options)
     else:
-        if options.name:
-            patch = options.name
-        elif filename:
-            patch = os.path.basename(filename)
-        else:
-            raise CmdException, 'Unknown patch name'
-        if options.strip:
-            patch = __strip_patch_name(patch)
-
-        __import_patch(patch, filename, options)
+        __import_file(filename, options)
 
-    print_crt_patch()
+    print_crt_patch(crt_series)