Fix the import --url command
[stgit] / stgit / commands / imprt.py
index 7ab0757..8067beb 100644 (file)
@@ -15,20 +15,20 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 """
 
-import sys, os, re, email
-from email.Header import decode_header, make_header
+import sys, os, re, email, tarfile
 from mailbox import UnixMailbox
 from StringIO import StringIO
-from optparse import OptionParser, make_option
-
+from stgit.argparse import opt
 from stgit.commands.common import *
 from stgit.utils import *
-from stgit import stack, git
-
-
-help = 'import a GNU diff file as a new patch'
-usage = """%prog [options] [<file>]
-
+from stgit.out import *
+from stgit import argparse, stack, git
+
+name = 'import'
+help = 'Import a GNU diff file as a new patch'
+kind = 'patch'
+usage = ['[options] [<file>|<url>]']
+description = """
 Create a new patch and apply the given GNU diff file (or the standard
 input). By default, the file name is used as the patch name but this
 can be overridden with the '--name' option. The patch can either be a
@@ -44,51 +44,44 @@ stack.
 The patch description has to be separated from the data with a '---'
 line."""
 
-options = [make_option('-m', '--mail',
-                       help = 'import the patch from a standard e-mail file',
-                       action = 'store_true'),
-           make_option('-M', '--mbox',
-                       help = 'import a series of patches from an mbox file',
-                       action = 'store_true'),
-           make_option('-s', '--series',
-                       help = 'import a series of patches',
-                       action = 'store_true'),
-           make_option('-n', '--name',
-                       help = 'use NAME as the patch name'),
-           make_option('-t', '--strip',
-                       help = 'strip numbering and extension from patch name',
-                       action = 'store_true'),
-           make_option('-i', '--ignore',
-                       help = 'ignore the applied patches in the series',
-                       action = 'store_true'),
-           make_option('--replace',
-                       help = 'replace the unapplied patches in the series',
-                       action = 'store_true'),
-           make_option('-b', '--base',
-                       help = 'use BASE instead of HEAD for file importing'),
-           make_option('-e', '--edit',
-                       help = 'invoke an editor for the patch description',
-                       action = 'store_true'),
-           make_option('-p', '--showpatch',
-                       help = 'show the patch content in the editor buffer',
-                       action = 'store_true'),
-           make_option('-a', '--author', metavar = '"NAME <EMAIL>"',
-                       help = 'use "NAME <EMAIL>" as the author details'),
-           make_option('--authname',
-                       help = 'use AUTHNAME as the author name'),
-           make_option('--authemail',
-                       help = 'use AUTHEMAIL as the author e-mail'),
-           make_option('--authdate',
-                       help = 'use AUTHDATE as the author date'),
-           make_option('--commname',
-                       help = 'use COMMNAME as the committer name'),
-           make_option('--commemail',
-                       help = 'use COMMEMAIL as the committer e-mail')]
-
-
-def __end_descr(line):
-    return re.match('---\s*$', line) or re.match('diff -', line) or \
-            re.match('Index: ', line)
+args = [argparse.files]
+options = [
+    opt('-m', '--mail', action = 'store_true',
+        short = 'Import the patch from a standard e-mail file'),
+    opt('-M', '--mbox', action = 'store_true',
+        short = 'Import a series of patches from an mbox file'),
+    opt('-s', '--series', action = 'store_true',
+        short = 'Import a series of patches', long = """
+        Import a series of patches from a series file or a tar archive."""),
+    opt('-u', '--url', action = 'store_true',
+        short = 'Import a patch from a URL'),
+    opt('-n', '--name',
+        short = 'Use NAME as the patch name'),
+    opt('-t', '--strip', action = 'store_true',
+        short = 'Strip numbering and extension from patch name'),
+    opt('-i', '--ignore', action = 'store_true',
+        short = 'Ignore the applied patches in the series'),
+    opt('--replace', action = 'store_true',
+        short = 'Replace the unapplied patches in the series'),
+    opt('-b', '--base', args = [argparse.commit],
+        short = 'Use BASE instead of HEAD for file importing'),
+    opt('--reject', action = 'store_true',
+        short = 'leave the rejected hunks in corresponding *.rej files'),
+    opt('-e', '--edit', action = 'store_true',
+        short = 'Invoke an editor for the patch description'),
+    opt('-p', '--showpatch', action = 'store_true',
+        short = 'Show the patch content in the editor buffer'),
+    opt('-a', '--author', metavar = '"NAME <EMAIL>"',
+        short = 'Use "NAME <EMAIL>" as the author details'),
+    opt('--authname',
+        short = 'Use AUTHNAME as the author name'),
+    opt('--authemail',
+        short = 'Use AUTHEMAIL as the author e-mail'),
+    opt('--authdate',
+        short = 'Use AUTHDATE as the author date'),
+    ] + argparse.sign_options()
+
+directory = DirectoryHasRepository(log = True)
 
 def __strip_patch_name(name):
     stripped = re.sub('^[0-9]+-(.*)$', '\g<1>', name)
@@ -101,155 +94,40 @@ def __replace_slashes_with_dashes(name):
 
     return stripped
 
-def __parse_description(descr):
-    """Parse the patch description and return the new description and
-    author information (if any).
-    """
-    subject = body = ''
-    authname = authemail = authdate = None
-
-    descr_lines = [line.rstrip() for line in  descr.split('\n')]
-    if not descr_lines:
-        raise CmdException, "Empty patch description"
-
-    lasthdr = 0
-    end = len(descr_lines)
-
-    # Parse the patch header
-    for pos in range(0, end):
-        if not descr_lines[pos]:
-           continue
-        # check for a "From|Author:" line
-        if re.match('\s*(?:from|author):\s+', descr_lines[pos], re.I):
-            auth = re.findall('^.*?:\s+(.*)$', descr_lines[pos])[0]
-            authname, authemail = name_email(auth)
-            lasthdr = pos + 1
-            continue
-        # check for a "Date:" line
-        if re.match('\s*date:\s+', descr_lines[pos], re.I):
-            authdate = re.findall('^.*?:\s+(.*)$', descr_lines[pos])[0]
-            lasthdr = pos + 1
-            continue
-        if subject:
-            break
-        # get the subject
-        subject = descr_lines[pos]
-        lasthdr = pos + 1
-
-    # get the body
-    if lasthdr < end:
-        body = reduce(lambda x, y: x + '\n' + y, descr_lines[lasthdr:], '')
-
-    return (subject + body, authname, authemail, authdate)
-
-def __parse_mail(msg):
-    """Parse the message object and return (description, authname,
-    authemail, authdate, diff)
-    """
-    def __decode_header(header):
-        """Decode a qp-encoded e-mail header as per rfc2047"""
-        try:
-            words_enc = decode_header(header)
-            hobj = make_header(words_enc)
-        except Exception, ex:
-            raise CmdException, 'header decoding error: %s' % str(ex)
-        return unicode(hobj).encode('utf-8')
-
-    # parse the headers
-    if msg.has_key('from'):
-        authname, authemail = name_email(__decode_header(msg['from']))
-    else:
-        authname = authemail = None
-
-    # '\n\t' can be found on multi-line headers
-    descr = __decode_header(msg['subject']).replace('\n\t', ' ')
-    authdate = msg['date']
-
-    # remove the '[*PATCH*]' expression in the subject
-    if descr:
-        descr = re.findall('^(\[.*?[Pp][Aa][Tt][Cc][Hh].*?\])?\s*(.*)$',
-                           descr)[0][1]
-        descr += '\n\n'
-    else:
-        raise CmdException, 'Subject: line not found'
-
-    # the rest of the message
-    if msg.is_multipart():
-        # this is assuming that the first part is the patch
-        # description and the second part is the attached patch
-        descr += msg.get_payload(0).get_payload(decode = True)
-        diff = msg.get_payload(1).get_payload(decode = True)
-    else:
-        diff = msg.get_payload(decode = True)
-
-        for line in diff.split('\n'):
-            if __end_descr(line):
-                break
-            descr += line + '\n'
-
-    descr.rstrip()
-
-    # parse the description for author information
-    descr, descr_authname, descr_authemail, descr_authdate = \
-           __parse_description(descr)
-    if descr_authname:
-        authname = descr_authname
-    if descr_authemail:
-        authemail = descr_authemail
-    if descr_authdate:
-       authdate = descr_authdate
-
-    return (descr, authname, authemail, authdate, diff)
-
-def __parse_patch(fobj):
-    """Parse the input file and return (description, authname,
-    authemail, authdate, diff)
-    """
-    descr = ''
-    while True:
-        line = fobj.readline()
-        if not line:
-            break
-
-        if __end_descr(line):
-            break
-        else:
-            descr += line
-    descr.rstrip()
-
-    diff = fobj.read()
-
-    descr, authname, authemail, authdate = __parse_description(descr)
-
-    # we don't yet have an agreed place for the creation date.
-    # Just return None
-    return (descr, authname, authemail, authdate, diff)
-
-def __create_patch(patch, message, author_name, author_email,
+def __create_patch(filename, message, author_name, author_email,
                    author_date, diff, options):
     """Create a new patch on the stack
     """
-    if not diff:
-        raise CmdException, 'No diff found inside the patch'
+    if options.name:
+        patch = options.name
+    elif filename:
+        patch = os.path.basename(filename)
+    else:
+        patch = ''
+    if options.strip:
+        patch = __strip_patch_name(patch)
 
     if not patch:
-        patch = make_patch_name(message, crt_series.patch_exists,
-                                alternative = not (options.ignore
-                                                   or options.replace))
+        if options.ignore or options.replace:
+            unacceptable_name = lambda name: False
+        else:
+            unacceptable_name = crt_series.patch_exists
+        patch = make_patch_name(message, unacceptable_name)
+    else:
+        # fix possible invalid characters in the patch name
+        patch = re.sub('[^\w.]+', '-', patch).strip('-')
 
     if options.ignore and patch in crt_series.get_applied():
-        print 'Ignoring already applied patch "%s"' % patch
+        out.info('Ignoring already applied patch "%s"' % patch)
         return
     if options.replace and patch in crt_series.get_unapplied():
-        crt_series.delete_patch(patch)
+        crt_series.delete_patch(patch, keep_log = True)
 
     # refresh_patch() will invoke the editor in this case, with correct
     # patch content
     if not message:
         can_edit = False
 
-    committer_name = committer_email = None
-
     if options.author:
         options.authname, options.authemail = name_email(options.author)
 
@@ -260,54 +138,78 @@ def __create_patch(patch, message, author_name, author_email,
         author_email = options.authemail
     if options.authdate:
         author_date = options.authdate
-    if options.commname:
-        committer_name = options.commname
-    if options.commemail:
-        committer_email = options.commemail
 
     crt_series.new_patch(patch, message = message, can_edit = False,
                          author_name = author_name,
                          author_email = author_email,
-                         author_date = author_date,
-                         committer_name = committer_name,
-                         committer_email = committer_email)
-
-    print 'Importing patch "%s"...' % patch,
-    sys.stdout.flush()
+                         author_date = author_date)
 
-    if options.base:
-        git.apply_patch(diff = diff, base = git_id(options.base))
+    if not diff:
+        out.warn('No diff found, creating empty patch')
     else:
-        git.apply_patch(diff = diff)
-
-    crt_series.refresh_patch(edit = options.edit,
-                             show_patch = options.showpatch)
+        out.start('Importing patch "%s"' % patch)
+        if options.base:
+            base = git_id(crt_series, options.base)
+        else:
+            base = None
+        git.apply_patch(diff = diff, base = base, reject = options.reject)
+        crt_series.refresh_patch(edit = options.edit,
+                                 show_patch = options.showpatch,
+                                 sign_str = options.sign_str,
+                                 backup = False)
+        out.done()
+
+def __mkpatchname(name, suffix):
+    if name.lower().endswith(suffix.lower()):
+        return name[:-len(suffix)]
+    return name
+
+def __get_handle_and_name(filename):
+    """Return a file object and a patch name derived from filename
+    """
+    # see if it's a gzip'ed or bzip2'ed patch
+    import bz2, gzip
+    for copen, ext in [(gzip.open, '.gz'), (bz2.BZ2File, '.bz2')]:
+        try:
+            f = copen(filename)
+            f.read(1)
+            f.seek(0)
+            return (f, __mkpatchname(filename, ext))
+        except IOError, e:
+            pass
 
-    print 'done'    
+    # plain old file...
+    return (open(filename), filename)
 
-def __import_file(patch, filename, options):
+def __import_file(filename, options, patch = None):
     """Import a patch from a file or standard input
     """
+    pname = None
     if filename:
-        f = file(filename)
+        (f, pname) = __get_handle_and_name(filename)
     else:
         f = sys.stdin
 
+    if patch:
+        pname = patch
+    elif not pname:
+        pname = filename
+
     if options.mail:
         try:
             msg = email.message_from_file(f)
         except Exception, ex:
             raise CmdException, 'error parsing the e-mail file: %s' % str(ex)
         message, author_name, author_email, author_date, diff = \
-                 __parse_mail(msg)
+                 parse_mail(msg)
     else:
         message, author_name, author_email, author_date, diff = \
-                 __parse_patch(f)
+                 parse_patch(f.read(), contains_diff = True)
 
     if filename:
         f.close()
 
-    __create_patch(patch, message, author_name, author_email,
+    __create_patch(pname, message, author_name, author_email,
                    author_date, diff, options)
 
 def __import_series(filename, options):
@@ -316,6 +218,9 @@ def __import_series(filename, options):
     applied = crt_series.get_applied()
 
     if filename:
+        if tarfile.is_tarfile(filename):
+            __import_tarfile(filename, options)
+            return
         f = file(filename)
         patchdir = os.path.dirname(filename)
     else:
@@ -327,12 +232,9 @@ def __import_series(filename, options):
         if not patch:
             continue
         patchfile = os.path.join(patchdir, patch)
-
-        if options.strip:
-            patch = __strip_patch_name(patch)
         patch = __replace_slashes_with_dashes(patch);
 
-        __import_file(patch, patchfile, options)
+        __import_file(patchfile, options, patch)
 
     if filename:
         f.close()
@@ -352,12 +254,64 @@ def __import_mbox(filename, options):
 
     for msg in mbox:
         message, author_name, author_email, author_date, diff = \
-                 __parse_mail(msg)
+                 parse_mail(msg)
         __create_patch(None, message, author_name, author_email,
                        author_date, diff, options)
 
     f.close()
 
+def __import_url(url, options):
+    """Import a patch from a URL
+    """
+    import urllib
+    import tempfile
+
+    if not url:
+        raise CmdException('URL argument required')
+
+    patch = os.path.basename(urllib.unquote(url))
+    filename = os.path.join(tempfile.gettempdir(), patch)
+    urllib.urlretrieve(url, filename)
+    __import_file(filename, options)
+
+def __import_tarfile(tar, options):
+    """Import patch series from a tar archive
+    """
+    import tempfile
+    import shutil
+
+    if not tarfile.is_tarfile(tar):
+        raise CmdException, "%s is not a tarfile!" % tar
+
+    t = tarfile.open(tar, 'r')
+    names = t.getnames()
+
+    # verify paths in the tarfile are safe
+    for n in names:
+        if n.startswith('/'):
+            raise CmdException, "Absolute path found in %s" % tar
+        if n.find("..") > -1:
+            raise CmdException, "Relative path found in %s" % tar
+
+    # find the series file
+    seriesfile = '';
+    for m in names:
+        if m.endswith('/series') or m == 'series':
+            seriesfile = m
+            break
+    if seriesfile == '':
+        raise CmdException, "no 'series' file found in %s" % tar
+
+    # unpack into a tmp dir
+    tmpdir = tempfile.mkdtemp('.stg')
+    t.extractall(tmpdir)
+
+    # apply the series
+    __import_series(os.path.join(tmpdir, seriesfile), options)
+
+    # cleanup the tmpdir
+    shutil.rmtree(tmpdir)
+
 def func(parser, options, args):
     """Import a GNU diff file as a new patch
     """
@@ -366,27 +320,24 @@ def func(parser, options, args):
 
     check_local_changes()
     check_conflicts()
-    check_head_top_equal()
+    check_head_top_equal(crt_series)
 
     if len(args) == 1:
         filename = args[0]
     else:
         filename = None
 
+    if not options.url and filename:
+        filename = os.path.abspath(filename)
+    directory.cd_to_topdir()
+
     if options.series:
         __import_series(filename, options)
     elif options.mbox:
         __import_mbox(filename, options)
+    elif options.url:
+        __import_url(filename, options)
     else:
-        if options.name:
-            patch = options.name
-        elif filename:
-            patch = os.path.basename(filename)
-        else:
-            patch = ''
-        if options.strip:
-            patch = __strip_patch_name(patch)
-
-        __import_file(patch, filename, options)
+        __import_file(filename, options)
 
-    print_crt_patch()
+    print_crt_patch(crt_series)