Add automatic git-mergetool invocation to the new infrastructure
[stgit] / stgit / lib / git.py
index 0e0cccb..9c530c7 100644 (file)
@@ -20,7 +20,7 @@ class Immutable(object):
     creating a whole new commit object that's exactly like the old one
     except for the commit message.)
 
-    The L{Immutable} class doesn't acytually enforce immutability --
+    The L{Immutable} class doesn't actually enforce immutability --
     that is up to the individual immutable subclasses. It just serves
     as documentation."""
 
@@ -139,6 +139,7 @@ class Person(Immutable, Repr):
         assert isinstance(self.__date, Date) or self.__date in [None, NoValue]
     name = property(lambda self: self.__name)
     email = property(lambda self: self.__email)
+    name_email = property(lambda self: '%s <%s>' % (self.name, self.email))
     date = property(lambda self: self.__date)
     def set_name(self, name):
         return type(self)(name = name, defaults = self)
@@ -147,7 +148,7 @@ class Person(Immutable, Repr):
     def set_date(self, date):
         return type(self)(date = date, defaults = self)
     def __str__(self):
-        return '%s <%s> %s' % (self.name, self.email, self.date)
+        return '%s %s' % (self.name_email, self.date)
     @classmethod
     def parse(cls, s):
         m = re.match(r'^([^<]*)<([^>]*)>\s+(\d+\s+[+-]\d{4})$', s)
@@ -182,16 +183,142 @@ class Person(Immutable, Repr):
                 defaults = cls.user())
         return cls.__committer
 
-class Tree(Immutable, Repr):
-    """Represents a git tree object."""
-    def __init__(self, sha1):
+class GitObject(Immutable, Repr):
+    """Base class for all git objects. One git object is represented by at
+    most one C{GitObject}, which makes it possible to compare them
+    using normal Python object comparison; it also ensures we don't
+    waste more memory than necessary."""
+
+class BlobData(Immutable, Repr):
+    """Represents the data contents of a git blob object."""
+    def __init__(self, string):
+        self.__string = str(string)
+    str = property(lambda self: self.__string)
+    def commit(self, repository):
+        """Commit the blob.
+        @return: The committed blob
+        @rtype: L{Blob}"""
+        sha1 = repository.run(['git', 'hash-object', '-w', '--stdin']
+                              ).raw_input(self.str).output_one_line()
+        return repository.get_blob(sha1)
+
+class Blob(GitObject):
+    """Represents a git blob object. All the actual data contents of the
+    blob object is stored in the L{data} member, which is a
+    L{BlobData} object."""
+    typename = 'blob'
+    default_perm = '100644'
+    def __init__(self, repository, sha1):
+        self.__repository = repository
         self.__sha1 = sha1
     sha1 = property(lambda self: self.__sha1)
     def __str__(self):
-        return 'Tree<%s>' % self.sha1
+        return 'Blob<%s>' % self.sha1
+    @property
+    def data(self):
+        return BlobData(self.__repository.cat_object(self.sha1))
+
+class ImmutableDict(dict):
+    """A dictionary that cannot be modified once it's been created."""
+    def error(*args, **kwargs):
+        raise TypeError('Cannot modify immutable dict')
+    __delitem__ = error
+    __setitem__ = error
+    clear = error
+    pop = error
+    popitem = error
+    setdefault = error
+    update = error
+
+class TreeData(Immutable, Repr):
+    """Represents the data contents of a git tree object."""
+    @staticmethod
+    def __x(po):
+        if isinstance(po, GitObject):
+            perm, object = po.default_perm, po
+        else:
+            perm, object = po
+        return perm, object
+    def __init__(self, entries):
+        """Create a new L{TreeData} object from the given mapping from names
+        (strings) to either (I{permission}, I{object}) tuples or just
+        objects."""
+        self.__entries = ImmutableDict((name, self.__x(po))
+                                       for (name, po) in entries.iteritems())
+    entries = property(lambda self: self.__entries)
+    """Map from name to (I{permission}, I{object}) tuple."""
+    def set_entry(self, name, po):
+        """Create a new L{TreeData} object identical to this one, except that
+        it maps C{name} to C{po}.
+
+        @param name: Name of the changed mapping
+        @type name: C{str}
+        @param po: Value of the changed mapping
+        @type po: L{Blob} or L{Tree} or (C{str}, L{Blob} or L{Tree})
+        @return: The new L{TreeData} object
+        @rtype: L{TreeData}"""
+        e = dict(self.entries)
+        e[name] = self.__x(po)
+        return type(self)(e)
+    def del_entry(self, name):
+        """Create a new L{TreeData} object identical to this one, except that
+        it doesn't map C{name} to anything.
+
+        @param name: Name of the deleted mapping
+        @type name: C{str}
+        @return: The new L{TreeData} object
+        @rtype: L{TreeData}"""
+        e = dict(self.entries)
+        del e[name]
+        return type(self)(e)
+    def commit(self, repository):
+        """Commit the tree.
+        @return: The committed tree
+        @rtype: L{Tree}"""
+        listing = ''.join(
+            '%s %s %s\t%s\0' % (mode, obj.typename, obj.sha1, name)
+            for (name, (mode, obj)) in self.entries.iteritems())
+        sha1 = repository.run(['git', 'mktree', '-z']
+                              ).raw_input(listing).output_one_line()
+        return repository.get_tree(sha1)
+    @classmethod
+    def parse(cls, repository, s):
+        """Parse a raw git tree description.
+
+        @return: A new L{TreeData} object
+        @rtype: L{TreeData}"""
+        entries = {}
+        for line in s.split('\0')[:-1]:
+            m = re.match(r'^([0-7]{6}) ([a-z]+) ([0-9a-f]{40})\t(.*)$', line)
+            assert m
+            perm, type, sha1, name = m.groups()
+            entries[name] = (perm, repository.get_object(type, sha1))
+        return cls(entries)
+
+class Tree(GitObject):
+    """Represents a git tree object. All the actual data contents of the
+    tree object is stored in the L{data} member, which is a
+    L{TreeData} object."""
+    typename = 'tree'
+    default_perm = '040000'
+    def __init__(self, repository, sha1):
+        self.__sha1 = sha1
+        self.__repository = repository
+        self.__data = None
+    sha1 = property(lambda self: self.__sha1)
+    @property
+    def data(self):
+        if self.__data == None:
+            self.__data = TreeData.parse(
+                self.__repository,
+                self.__repository.run(['git', 'ls-tree', '-z', self.sha1]
+                                      ).raw_output())
+        return self.__data
+    def __str__(self):
+        return 'Tree<sha1: %s>' % self.sha1
 
 class CommitData(Immutable, Repr):
-    """Represents the actual data contents of a git commit object."""
+    """Represents the data contents of a git commit object."""
     def __init__(self, tree = NoValue, parents = NoValue, author = NoValue,
                  committer = NoValue, message = NoValue, defaults = NoValue):
         d = make_defaults(defaults)
@@ -238,8 +365,30 @@ class CommitData(Immutable, Repr):
         return ('CommitData<tree: %s, parents: %s, author: %s,'
                 ' committer: %s, message: "%s">'
                 ) % (tree, parents, self.author, self.committer, self.message)
+    def commit(self, repository):
+        """Commit the commit.
+        @return: The committed commit
+        @rtype: L{Commit}"""
+        c = ['git', 'commit-tree', self.tree.sha1]
+        for p in self.parents:
+            c.append('-p')
+            c.append(p.sha1)
+        env = {}
+        for p, v1 in ((self.author, 'AUTHOR'),
+                       (self.committer, 'COMMITTER')):
+            if p != None:
+                for attr, v2 in (('name', 'NAME'), ('email', 'EMAIL'),
+                                 ('date', 'DATE')):
+                    if getattr(p, attr) != None:
+                        env['GIT_%s_%s' % (v1, v2)] = str(getattr(p, attr))
+        sha1 = repository.run(c, env = env).raw_input(self.message
+                                                      ).output_one_line()
+        return repository.get_commit(sha1)
     @classmethod
     def parse(cls, repository, s):
+        """Parse a raw git commit description.
+        @return: A new L{CommitData} object
+        @rtype: L{CommitData}"""
         cd = cls(parents = [])
         lines = list(s.splitlines(True))
         for i in xrange(len(lines)):
@@ -259,10 +408,11 @@ class CommitData(Immutable, Repr):
                 assert False
         assert False
 
-class Commit(Immutable, Repr):
+class Commit(GitObject):
     """Represents a git commit object. All the actual data contents of the
     commit object is stored in the L{data} member, which is a
     L{CommitData} object."""
+    typename = 'commit'
     def __init__(self, repository, sha1):
         self.__sha1 = sha1
         self.__repository = repository
@@ -361,13 +511,22 @@ class RunWithEnvCwd(RunWithEnv):
         @type env: dict
         @param env: Extra environment"""
         return RunWithEnv.run(self, args, env).cwd(self.cwd)
+    def run_in_cwd(self, args):
+        """Run the given command with an environment given by self.env and
+        self.env_in_cwd, without changing the current working
+        directory.
+
+        @type args: list of strings
+        @param args: Command and argument vector"""
+        return RunWithEnv.run(self, args, self.env_in_cwd)
 
 class Repository(RunWithEnv):
     """Represents a git repository."""
     def __init__(self, directory):
         self.__git_dir = directory
         self.__refs = Refs(self)
-        self.__trees = ObjectCache(lambda sha1: Tree(sha1))
+        self.__blobs = ObjectCache(lambda sha1: Blob(self, sha1))
+        self.__trees = ObjectCache(lambda sha1: Tree(self, sha1))
         self.__commits = ObjectCache(lambda sha1: Commit(self, sha1))
         self.__default_index = None
         self.__default_worktree = None
@@ -384,7 +543,7 @@ class Repository(RunWithEnv):
     @property
     def current_branch_name(self):
         """Return the name of the current branch."""
-        return utils.strip_leading('refs/heads/', self.head_ref)
+        return utils.strip_prefix('refs/heads/', self.head_ref)
     @property
     def default_index(self):
         """An L{Index} object representing the default index file for the
@@ -422,33 +581,25 @@ class Repository(RunWithEnv):
     refs = property(lambda self: self.__refs)
     def cat_object(self, sha1):
         return self.run(['git', 'cat-file', '-p', sha1]).raw_output()
-    def rev_parse(self, rev):
+    def rev_parse(self, rev, discard_stderr = False):
         try:
             return self.get_commit(self.run(
                     ['git', 'rev-parse', '%s^{commit}' % rev]
-                    ).output_one_line())
+                    ).discard_stderr(discard_stderr).output_one_line())
         except run.RunException:
             raise RepositoryException('%s: No such revision' % rev)
+    def get_blob(self, sha1):
+        return self.__blobs[sha1]
     def get_tree(self, sha1):
         return self.__trees[sha1]
     def get_commit(self, sha1):
         return self.__commits[sha1]
-    def commit(self, commitdata):
-        c = ['git', 'commit-tree', commitdata.tree.sha1]
-        for p in commitdata.parents:
-            c.append('-p')
-            c.append(p.sha1)
-        env = {}
-        for p, v1 in ((commitdata.author, 'AUTHOR'),
-                       (commitdata.committer, 'COMMITTER')):
-            if p != None:
-                for attr, v2 in (('name', 'NAME'), ('email', 'EMAIL'),
-                                 ('date', 'DATE')):
-                    if getattr(p, attr) != None:
-                        env['GIT_%s_%s' % (v1, v2)] = str(getattr(p, attr))
-        sha1 = self.run(c, env = env).raw_input(commitdata.message
-                                                ).output_one_line()
-        return self.get_commit(sha1)
+    def get_object(self, type, sha1):
+        return { Blob.typename: self.get_blob,
+                 Tree.typename: self.get_tree,
+                 Commit.typename: self.get_commit }[type](sha1)
+    def commit(self, objectdata):
+        return objectdata.commit(self)
     @property
     def head_ref(self):
         try:
@@ -459,31 +610,13 @@ class Repository(RunWithEnv):
     def set_head_ref(self, ref, msg):
         self.run(['git', 'symbolic-ref', '-m', msg, 'HEAD', ref]).no_output()
     def simple_merge(self, base, ours, theirs):
-        """Given three L{Tree}s, tries to do an in-index merge with a
-        temporary index. Returns the result L{Tree}, or None if the
-        merge failed (due to conflicts)."""
-        assert isinstance(base, Tree)
-        assert isinstance(ours, Tree)
-        assert isinstance(theirs, Tree)
-
-        # Take care of the really trivial cases.
-        if base == ours:
-            return theirs
-        if base == theirs:
-            return ours
-        if ours == theirs:
-            return ours
-
         index = self.temp_index()
         try:
-            index.merge(base, ours, theirs)
-            try:
-                return index.write_tree()
-            except MergeException:
-                return None
+            result, index_tree = index.merge(base, ours, theirs)
         finally:
             index.delete()
-    def apply(self, tree, patch_text):
+        return result
+    def apply(self, tree, patch_text, quiet):
         """Given a L{Tree} and a patch, will either return the new L{Tree}
         that results when the patch is applied, or None if the patch
         couldn't be applied."""
@@ -494,7 +627,7 @@ class Repository(RunWithEnv):
         try:
             index.read_tree(tree)
             try:
-                index.apply(patch_text)
+                index.apply(patch_text, quiet)
                 return index.write_tree()
             except MergeException:
                 return None
@@ -512,12 +645,41 @@ class Repository(RunWithEnv):
         assert isinstance(t2, Tree)
         return self.run(['git', 'diff-tree', '-p'] + list(diff_opts)
                         + [t1.sha1, t2.sha1]).raw_output()
+    def diff_tree_files(self, t1, t2):
+        """Given two L{Tree}s C{t1} and C{t2}, iterate over all files for
+        which they differ. For each file, yield a tuple with the old
+        file mode, the new file mode, the old blob, the new blob, the
+        status, the old filename, and the new filename. Except in case
+        of a copy or a rename, the old and new filenames are
+        identical."""
+        assert isinstance(t1, Tree)
+        assert isinstance(t2, Tree)
+        i = iter(self.run(['git', 'diff-tree', '-r', '-z'] + [t1.sha1, t2.sha1]
+                          ).raw_output().split('\0'))
+        try:
+            while True:
+                x = i.next()
+                if not x:
+                    continue
+                omode, nmode, osha1, nsha1, status = x[1:].split(' ')
+                fn1 = i.next()
+                if status[0] in ['C', 'R']:
+                    fn2 = i.next()
+                else:
+                    fn2 = fn1
+                yield (omode, nmode, self.get_blob(osha1),
+                       self.get_blob(nsha1), status, fn1, fn2)
+        except StopIteration:
+            pass
 
 class MergeException(exception.StgException):
     """Exception raised when a merge fails for some reason."""
 
 class MergeConflictException(MergeException):
     """Exception raised when a merge fails due to conflicts."""
+    def __init__(self, conflicts):
+        MergeException.__init__(self)
+        self.conflicts = conflicts
 
 class Index(RunWithEnv):
     """Represents a git index file."""
@@ -535,30 +697,81 @@ class Index(RunWithEnv):
     def read_tree(self, tree):
         self.run(['git', 'read-tree', tree.sha1]).no_output()
     def write_tree(self):
+        """Write the index contents to the repository.
+        @return: The resulting L{Tree}
+        @rtype: L{Tree}"""
         try:
             return self.__repository.get_tree(
                 self.run(['git', 'write-tree']).discard_stderr(
                     ).output_one_line())
         except run.RunException:
             raise MergeException('Conflicting merge')
-    def is_clean(self):
+    def is_clean(self, tree):
+        """Check whether the index is clean relative to the given treeish."""
         try:
-            self.run(['git', 'update-index', '--refresh']).discard_output()
+            self.run(['git', 'diff-index', '--quiet', '--cached', tree.sha1]
+                    ).discard_output()
         except run.RunException:
             return False
         else:
             return True
-    def merge(self, base, ours, theirs):
-        """In-index merge, no worktree involved."""
-        self.run(['git', 'read-tree', '-m', '-i', '--aggressive',
-                  base.sha1, ours.sha1, theirs.sha1]).no_output()
-    def apply(self, patch_text):
+    def apply(self, patch_text, quiet):
         """In-index patch application, no worktree involved."""
         try:
-            self.run(['git', 'apply', '--cached']
-                     ).raw_input(patch_text).no_output()
+            r = self.run(['git', 'apply', '--cached']).raw_input(patch_text)
+            if quiet:
+                r = r.discard_stderr()
+            r.no_output()
         except run.RunException:
             raise MergeException('Patch does not apply cleanly')
+    def apply_treediff(self, tree1, tree2, quiet):
+        """Apply the diff from C{tree1} to C{tree2} to the index."""
+        # Passing --full-index here is necessary to support binary
+        # files. It is also sufficient, since the repository already
+        # contains all involved objects; in other words, we don't have
+        # to use --binary.
+        self.apply(self.__repository.diff_tree(tree1, tree2, ['--full-index']),
+                   quiet)
+    def merge(self, base, ours, theirs, current = None):
+        """Use the index (and only the index) to do a 3-way merge of the
+        L{Tree}s C{base}, C{ours} and C{theirs}. The merge will either
+        succeed (in which case the first half of the return value is
+        the resulting tree) or fail cleanly (in which case the first
+        half of the return value is C{None}).
+
+        If C{current} is given (and not C{None}), it is assumed to be
+        the L{Tree} currently stored in the index; this information is
+        used to avoid having to read the right tree (either of C{ours}
+        and C{theirs}) into the index if it's already there. The
+        second half of the return value is the tree now stored in the
+        index, or C{None} if unknown. If the merge succeeded, this is
+        often the merge result."""
+        assert isinstance(base, Tree)
+        assert isinstance(ours, Tree)
+        assert isinstance(theirs, Tree)
+        assert current == None or isinstance(current, Tree)
+
+        # Take care of the really trivial cases.
+        if base == ours:
+            return (theirs, current)
+        if base == theirs:
+            return (ours, current)
+        if ours == theirs:
+            return (ours, current)
+
+        if current == theirs:
+            # Swap the trees. It doesn't matter since merging is
+            # symmetric, and will allow us to avoid the read_tree()
+            # call below.
+            ours, theirs = theirs, ours
+        if current != ours:
+            self.read_tree(ours)
+        try:
+            self.apply_treediff(base, theirs, quiet = True)
+            result = self.write_tree()
+            return (result, result)
+        except MergeException:
+            return (None, ours)
     def delete(self):
         if os.path.isfile(self.__filename):
             os.remove(self.__filename)
@@ -576,6 +789,7 @@ class Worktree(object):
     def __init__(self, directory):
         self.__directory = directory
     env = property(lambda self: { 'GIT_WORK_TREE': '.' })
+    env_in_cwd = property(lambda self: { 'GIT_WORK_TREE': self.directory })
     directory = property(lambda self: self.__directory)
 
 class CheckoutException(exception.StgException):
@@ -592,7 +806,12 @@ class IndexAndWorktree(RunWithEnvCwd):
     index = property(lambda self: self.__index)
     env = property(lambda self: utils.add_dict(self.__index.env,
                                                self.__worktree.env))
+    env_in_cwd = property(lambda self: self.__worktree.env_in_cwd)
     cwd = property(lambda self: self.__worktree.directory)
+    def checkout_hard(self, tree):
+        assert isinstance(tree, Tree)
+        self.run(['git', 'read-tree', '--reset', '-u', tree.sha1]
+                 ).discard_output()
     def checkout(self, old_tree, new_tree):
         # TODO: Optionally do a 3-way instead of doing nothing when we
         # have a problem. Or maybe we should stash changes in a patch?
@@ -605,7 +824,7 @@ class IndexAndWorktree(RunWithEnvCwd):
                      ).discard_output()
         except run.RunException:
             raise CheckoutException('Index/workdir dirty')
-    def merge(self, base, ours, theirs):
+    def merge(self, base, ours, theirs, interactive = False):
         assert isinstance(base, Tree)
         assert isinstance(ours, Tree)
         assert isinstance(theirs, Tree)
@@ -615,17 +834,53 @@ class IndexAndWorktree(RunWithEnvCwd):
                          env = { 'GITHEAD_%s' % base.sha1: 'ancestor',
                                  'GITHEAD_%s' % ours.sha1: 'current',
                                  'GITHEAD_%s' % theirs.sha1: 'patched'})
-            r.discard_output()
+            r.returns([0, 1])
+            output = r.output_lines()
+            if r.exitcode:
+                # There were conflicts
+                if interactive:
+                    self.mergetool()
+                else:
+                    conflicts = [l for l in output if l.startswith('CONFLICT')]
+                    raise MergeConflictException(conflicts)
         except run.RunException, e:
-            if r.exitcode == 1:
-                raise MergeConflictException()
-            else:
-                raise MergeException('Index/worktree dirty')
-    def changed_files(self):
-        return self.run(['git', 'diff-files', '--name-only']).output_lines()
-    def update_index(self, files):
-        self.run(['git', 'update-index', '--remove', '-z', '--stdin']
-                 ).input_nulterm(files).discard_output()
+            raise MergeException('Index/worktree dirty')
+    def mergetool(self, files = ()):
+        """Invoke 'git mergetool' on the current IndexAndWorktree to resolve
+        any outstanding conflicts. If 'not files', all the files in an
+        unmerged state will be processed."""
+        self.run(['git', 'mergetool'] + list(files)).returns([0, 1]).run()
+        # check for unmerged entries (prepend 'CONFLICT ' for consistency with
+        # merge())
+        conflicts = ['CONFLICT ' + f for f in self.index.conflicts()]
+        if conflicts:
+            raise MergeConflictException(conflicts)
+    def changed_files(self, tree, pathlimits = []):
+        """Return the set of files in the worktree that have changed with
+        respect to C{tree}. The listing is optionally restricted to
+        those files that match any of the path limiters given.
+
+        The path limiters are relative to the current working
+        directory; the returned file names are relative to the
+        repository root."""
+        assert isinstance(tree, Tree)
+        return set(self.run_in_cwd(
+                ['git', 'diff-index', tree.sha1, '--name-only', '-z', '--']
+                + list(pathlimits)).raw_output().split('\0')[:-1])
+    def update_index(self, paths):
+        """Update the index with files from the worktree. C{paths} is an
+        iterable of paths relative to the root of the repository."""
+        cmd = ['git', 'update-index', '--remove']
+        self.run(cmd + ['-z', '--stdin']
+                 ).input_nulterm(paths).discard_output()
+    def worktree_clean(self):
+        """Check whether the worktree is clean relative to index."""
+        try:
+            self.run(['git', 'update-index', '--refresh']).discard_output()
+        except run.RunException:
+            return False
+        else:
+            return True
 
 class Branch(object):
     """Represents a Git branch."""
@@ -673,3 +928,8 @@ class Branch(object):
         repository.run(['git', 'branch', create_at.sha1]).discard_output()
 
         return cls(repository, name)
+
+def diffstat(diff):
+    """Return the diffstat of the supplied diff."""
+    return run.Run('git', 'apply', '--stat', '--summary'
+                   ).raw_input(diff).raw_output()