Create a git.Branch class as ancestor of stack.Stack
[stgit] / stgit / lib / git.py
1 """A Python class hierarchy wrapping a git repository and its
2 contents."""
3
4 import os, os.path, re
5 from datetime import datetime, timedelta, tzinfo
6
7 from stgit import exception, run, utils
8 from stgit.config import config
9
10 class Immutable(object):
11 """I{Immutable} objects cannot be modified once created. Any
12 modification methods will return a new object, leaving the
13 original object as it was.
14
15 The reason for this is that we want to be able to represent git
16 objects, which are immutable, and want to be able to create new
17 git objects that are just slight modifications of other git
18 objects. (Such as, for example, modifying the commit message of a
19 commit object while leaving the rest of it intact. This involves
20 creating a whole new commit object that's exactly like the old one
21 except for the commit message.)
22
23 The L{Immutable} class doesn't acytually enforce immutability --
24 that is up to the individual immutable subclasses. It just serves
25 as documentation."""
26
27 class RepositoryException(exception.StgException):
28 """Base class for all exceptions due to failed L{Repository}
29 operations."""
30
31 class BranchException(exception.StgException):
32 """Exception raised by failed L{Branch} operations."""
33
34 class DateException(exception.StgException):
35 """Exception raised when a date+time string could not be parsed."""
36 def __init__(self, string, type):
37 exception.StgException.__init__(
38 self, '"%s" is not a valid %s' % (string, type))
39
40 class DetachedHeadException(RepositoryException):
41 """Exception raised when HEAD is detached (that is, there is no
42 current branch)."""
43 def __init__(self):
44 RepositoryException.__init__(self, 'Not on any branch')
45
46 class Repr(object):
47 """Utility class that defines C{__reps__} in terms of C{__str__}."""
48 def __repr__(self):
49 return str(self)
50
51 class NoValue(object):
52 """A handy default value that is guaranteed to be distinct from any
53 real argument value."""
54 pass
55
56 def make_defaults(defaults):
57 def d(val, attr, default_fun = lambda: None):
58 if val != NoValue:
59 return val
60 elif defaults != NoValue:
61 return getattr(defaults, attr)
62 else:
63 return default_fun()
64 return d
65
66 class TimeZone(tzinfo, Repr):
67 """A simple time zone class for static offsets from UTC. (We have to
68 define our own since Python's standard library doesn't define any
69 time zone classes.)"""
70 def __init__(self, tzstring):
71 m = re.match(r'^([+-])(\d{2}):?(\d{2})$', tzstring)
72 if not m:
73 raise DateException(tzstring, 'time zone')
74 sign = int(m.group(1) + '1')
75 try:
76 self.__offset = timedelta(hours = sign*int(m.group(2)),
77 minutes = sign*int(m.group(3)))
78 except OverflowError:
79 raise DateException(tzstring, 'time zone')
80 self.__name = tzstring
81 def utcoffset(self, dt):
82 return self.__offset
83 def tzname(self, dt):
84 return self.__name
85 def dst(self, dt):
86 return timedelta(0)
87 def __str__(self):
88 return self.__name
89
90 class Date(Immutable, Repr):
91 """Represents a timestamp used in git commits."""
92 def __init__(self, datestring):
93 # Try git-formatted date.
94 m = re.match(r'^(\d+)\s+([+-]\d\d:?\d\d)$', datestring)
95 if m:
96 try:
97 self.__time = datetime.fromtimestamp(int(m.group(1)),
98 TimeZone(m.group(2)))
99 except ValueError:
100 raise DateException(datestring, 'date')
101 return
102
103 # Try iso-formatted date.
104 m = re.match(r'^(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\s+'
105 + r'([+-]\d\d:?\d\d)$', datestring)
106 if m:
107 try:
108 self.__time = datetime(
109 *[int(m.group(i + 1)) for i in xrange(6)],
110 **{'tzinfo': TimeZone(m.group(7))})
111 except ValueError:
112 raise DateException(datestring, 'date')
113 return
114
115 raise DateException(datestring, 'date')
116 def __str__(self):
117 return self.isoformat()
118 def isoformat(self):
119 """Human-friendly ISO 8601 format."""
120 return '%s %s' % (self.__time.replace(tzinfo = None).isoformat(' '),
121 self.__time.tzinfo)
122 @classmethod
123 def maybe(cls, datestring):
124 """Return a new object initialized with the argument if it contains a
125 value (otherwise, just return the argument)."""
126 if datestring in [None, NoValue]:
127 return datestring
128 return cls(datestring)
129
130 class Person(Immutable, Repr):
131 """Represents an author or committer in a git commit object. Contains
132 name, email and timestamp."""
133 def __init__(self, name = NoValue, email = NoValue,
134 date = NoValue, defaults = NoValue):
135 d = make_defaults(defaults)
136 self.__name = d(name, 'name')
137 self.__email = d(email, 'email')
138 self.__date = d(date, 'date')
139 assert isinstance(self.__date, Date) or self.__date in [None, NoValue]
140 name = property(lambda self: self.__name)
141 email = property(lambda self: self.__email)
142 date = property(lambda self: self.__date)
143 def set_name(self, name):
144 return type(self)(name = name, defaults = self)
145 def set_email(self, email):
146 return type(self)(email = email, defaults = self)
147 def set_date(self, date):
148 return type(self)(date = date, defaults = self)
149 def __str__(self):
150 return '%s <%s> %s' % (self.name, self.email, self.date)
151 @classmethod
152 def parse(cls, s):
153 m = re.match(r'^([^<]*)<([^>]*)>\s+(\d+\s+[+-]\d{4})$', s)
154 assert m
155 name = m.group(1).strip()
156 email = m.group(2)
157 date = Date(m.group(3))
158 return cls(name, email, date)
159 @classmethod
160 def user(cls):
161 if not hasattr(cls, '__user'):
162 cls.__user = cls(name = config.get('user.name'),
163 email = config.get('user.email'))
164 return cls.__user
165 @classmethod
166 def author(cls):
167 if not hasattr(cls, '__author'):
168 cls.__author = cls(
169 name = os.environ.get('GIT_AUTHOR_NAME', NoValue),
170 email = os.environ.get('GIT_AUTHOR_EMAIL', NoValue),
171 date = Date.maybe(os.environ.get('GIT_AUTHOR_DATE', NoValue)),
172 defaults = cls.user())
173 return cls.__author
174 @classmethod
175 def committer(cls):
176 if not hasattr(cls, '__committer'):
177 cls.__committer = cls(
178 name = os.environ.get('GIT_COMMITTER_NAME', NoValue),
179 email = os.environ.get('GIT_COMMITTER_EMAIL', NoValue),
180 date = Date.maybe(
181 os.environ.get('GIT_COMMITTER_DATE', NoValue)),
182 defaults = cls.user())
183 return cls.__committer
184
185 class Tree(Immutable, Repr):
186 """Represents a git tree object."""
187 def __init__(self, sha1):
188 self.__sha1 = sha1
189 sha1 = property(lambda self: self.__sha1)
190 def __str__(self):
191 return 'Tree<%s>' % self.sha1
192
193 class CommitData(Immutable, Repr):
194 """Represents the actual data contents of a git commit object."""
195 def __init__(self, tree = NoValue, parents = NoValue, author = NoValue,
196 committer = NoValue, message = NoValue, defaults = NoValue):
197 d = make_defaults(defaults)
198 self.__tree = d(tree, 'tree')
199 self.__parents = d(parents, 'parents')
200 self.__author = d(author, 'author', Person.author)
201 self.__committer = d(committer, 'committer', Person.committer)
202 self.__message = d(message, 'message')
203 tree = property(lambda self: self.__tree)
204 parents = property(lambda self: self.__parents)
205 @property
206 def parent(self):
207 assert len(self.__parents) == 1
208 return self.__parents[0]
209 author = property(lambda self: self.__author)
210 committer = property(lambda self: self.__committer)
211 message = property(lambda self: self.__message)
212 def set_tree(self, tree):
213 return type(self)(tree = tree, defaults = self)
214 def set_parents(self, parents):
215 return type(self)(parents = parents, defaults = self)
216 def add_parent(self, parent):
217 return type(self)(parents = list(self.parents or []) + [parent],
218 defaults = self)
219 def set_parent(self, parent):
220 return self.set_parents([parent])
221 def set_author(self, author):
222 return type(self)(author = author, defaults = self)
223 def set_committer(self, committer):
224 return type(self)(committer = committer, defaults = self)
225 def set_message(self, message):
226 return type(self)(message = message, defaults = self)
227 def is_nochange(self):
228 return len(self.parents) == 1 and self.tree == self.parent.data.tree
229 def __str__(self):
230 if self.tree == None:
231 tree = None
232 else:
233 tree = self.tree.sha1
234 if self.parents == None:
235 parents = None
236 else:
237 parents = [p.sha1 for p in self.parents]
238 return ('CommitData<tree: %s, parents: %s, author: %s,'
239 ' committer: %s, message: "%s">'
240 ) % (tree, parents, self.author, self.committer, self.message)
241 @classmethod
242 def parse(cls, repository, s):
243 cd = cls(parents = [])
244 lines = list(s.splitlines(True))
245 for i in xrange(len(lines)):
246 line = lines[i].strip()
247 if not line:
248 return cd.set_message(''.join(lines[i+1:]))
249 key, value = line.split(None, 1)
250 if key == 'tree':
251 cd = cd.set_tree(repository.get_tree(value))
252 elif key == 'parent':
253 cd = cd.add_parent(repository.get_commit(value))
254 elif key == 'author':
255 cd = cd.set_author(Person.parse(value))
256 elif key == 'committer':
257 cd = cd.set_committer(Person.parse(value))
258 else:
259 assert False
260 assert False
261
262 class Commit(Immutable, Repr):
263 """Represents a git commit object. All the actual data contents of the
264 commit object is stored in the L{data} member, which is a
265 L{CommitData} object."""
266 def __init__(self, repository, sha1):
267 self.__sha1 = sha1
268 self.__repository = repository
269 self.__data = None
270 sha1 = property(lambda self: self.__sha1)
271 @property
272 def data(self):
273 if self.__data == None:
274 self.__data = CommitData.parse(
275 self.__repository,
276 self.__repository.cat_object(self.sha1))
277 return self.__data
278 def __str__(self):
279 return 'Commit<sha1: %s, data: %s>' % (self.sha1, self.__data)
280
281 class Refs(object):
282 """Accessor for the refs stored in a git repository. Will
283 transparently cache the values of all refs."""
284 def __init__(self, repository):
285 self.__repository = repository
286 self.__refs = None
287 def __cache_refs(self):
288 """(Re-)Build the cache of all refs in the repository."""
289 self.__refs = {}
290 for line in self.__repository.run(['git', 'show-ref']).output_lines():
291 m = re.match(r'^([0-9a-f]{40})\s+(\S+)$', line)
292 sha1, ref = m.groups()
293 self.__refs[ref] = sha1
294 def get(self, ref):
295 """Get the Commit the given ref points to. Throws KeyError if ref
296 doesn't exist."""
297 if self.__refs == None:
298 self.__cache_refs()
299 return self.__repository.get_commit(self.__refs[ref])
300 def exists(self, ref):
301 """Check if the given ref exists."""
302 try:
303 self.get(ref)
304 except KeyError:
305 return False
306 else:
307 return True
308 def set(self, ref, commit, msg):
309 """Write the sha1 of the given Commit to the ref. The ref may or may
310 not already exist."""
311 if self.__refs == None:
312 self.__cache_refs()
313 old_sha1 = self.__refs.get(ref, '0'*40)
314 new_sha1 = commit.sha1
315 if old_sha1 != new_sha1:
316 self.__repository.run(['git', 'update-ref', '-m', msg,
317 ref, new_sha1, old_sha1]).no_output()
318 self.__refs[ref] = new_sha1
319 def delete(self, ref):
320 """Delete the given ref. Throws KeyError if ref doesn't exist."""
321 if self.__refs == None:
322 self.__cache_refs()
323 self.__repository.run(['git', 'update-ref',
324 '-d', ref, self.__refs[ref]]).no_output()
325 del self.__refs[ref]
326
327 class ObjectCache(object):
328 """Cache for Python objects, for making sure that we create only one
329 Python object per git object. This reduces memory consumption and
330 makes object comparison very cheap."""
331 def __init__(self, create):
332 self.__objects = {}
333 self.__create = create
334 def __getitem__(self, name):
335 if not name in self.__objects:
336 self.__objects[name] = self.__create(name)
337 return self.__objects[name]
338 def __contains__(self, name):
339 return name in self.__objects
340 def __setitem__(self, name, val):
341 assert not name in self.__objects
342 self.__objects[name] = val
343
344 class RunWithEnv(object):
345 def run(self, args, env = {}):
346 """Run the given command with an environment given by self.env.
347
348 @type args: list of strings
349 @param args: Command and argument vector
350 @type env: dict
351 @param env: Extra environment"""
352 return run.Run(*args).env(utils.add_dict(self.env, env))
353
354 class RunWithEnvCwd(RunWithEnv):
355 def run(self, args, env = {}):
356 """Run the given command with an environment given by self.env, and
357 current working directory given by self.cwd.
358
359 @type args: list of strings
360 @param args: Command and argument vector
361 @type env: dict
362 @param env: Extra environment"""
363 return RunWithEnv.run(self, args, env).cwd(self.cwd)
364
365 class Repository(RunWithEnv):
366 """Represents a git repository."""
367 def __init__(self, directory):
368 self.__git_dir = directory
369 self.__refs = Refs(self)
370 self.__trees = ObjectCache(lambda sha1: Tree(sha1))
371 self.__commits = ObjectCache(lambda sha1: Commit(self, sha1))
372 self.__default_index = None
373 self.__default_worktree = None
374 self.__default_iw = None
375 env = property(lambda self: { 'GIT_DIR': self.__git_dir })
376 @classmethod
377 def default(cls):
378 """Return the default repository."""
379 try:
380 return cls(run.Run('git', 'rev-parse', '--git-dir'
381 ).output_one_line())
382 except run.RunException:
383 raise RepositoryException('Cannot find git repository')
384 @property
385 def current_branch_name(self):
386 """Return the name of the current branch."""
387 return utils.strip_leading('refs/heads/', self.head_ref)
388 @property
389 def default_index(self):
390 """An L{Index} object representing the default index file for the
391 repository."""
392 if self.__default_index == None:
393 self.__default_index = Index(
394 self, (os.environ.get('GIT_INDEX_FILE', None)
395 or os.path.join(self.__git_dir, 'index')))
396 return self.__default_index
397 def temp_index(self):
398 """Return an L{Index} object representing a new temporary index file
399 for the repository."""
400 return Index(self, self.__git_dir)
401 @property
402 def default_worktree(self):
403 """A L{Worktree} object representing the default work tree."""
404 if self.__default_worktree == None:
405 path = os.environ.get('GIT_WORK_TREE', None)
406 if not path:
407 o = run.Run('git', 'rev-parse', '--show-cdup').output_lines()
408 o = o or ['.']
409 assert len(o) == 1
410 path = o[0]
411 self.__default_worktree = Worktree(path)
412 return self.__default_worktree
413 @property
414 def default_iw(self):
415 """An L{IndexAndWorktree} object representing the default index and
416 work tree for this repository."""
417 if self.__default_iw == None:
418 self.__default_iw = IndexAndWorktree(self.default_index,
419 self.default_worktree)
420 return self.__default_iw
421 directory = property(lambda self: self.__git_dir)
422 refs = property(lambda self: self.__refs)
423 def cat_object(self, sha1):
424 return self.run(['git', 'cat-file', '-p', sha1]).raw_output()
425 def rev_parse(self, rev):
426 try:
427 return self.get_commit(self.run(
428 ['git', 'rev-parse', '%s^{commit}' % rev]
429 ).output_one_line())
430 except run.RunException:
431 raise RepositoryException('%s: No such revision' % rev)
432 def get_tree(self, sha1):
433 return self.__trees[sha1]
434 def get_commit(self, sha1):
435 return self.__commits[sha1]
436 def commit(self, commitdata):
437 c = ['git', 'commit-tree', commitdata.tree.sha1]
438 for p in commitdata.parents:
439 c.append('-p')
440 c.append(p.sha1)
441 env = {}
442 for p, v1 in ((commitdata.author, 'AUTHOR'),
443 (commitdata.committer, 'COMMITTER')):
444 if p != None:
445 for attr, v2 in (('name', 'NAME'), ('email', 'EMAIL'),
446 ('date', 'DATE')):
447 if getattr(p, attr) != None:
448 env['GIT_%s_%s' % (v1, v2)] = str(getattr(p, attr))
449 sha1 = self.run(c, env = env).raw_input(commitdata.message
450 ).output_one_line()
451 return self.get_commit(sha1)
452 @property
453 def head_ref(self):
454 try:
455 return self.run(['git', 'symbolic-ref', '-q', 'HEAD']
456 ).output_one_line()
457 except run.RunException:
458 raise DetachedHeadException()
459 def set_head_ref(self, ref, msg):
460 self.run(['git', 'symbolic-ref', '-m', msg, 'HEAD', ref]).no_output()
461 def simple_merge(self, base, ours, theirs):
462 """Given three L{Tree}s, tries to do an in-index merge with a
463 temporary index. Returns the result L{Tree}, or None if the
464 merge failed (due to conflicts)."""
465 assert isinstance(base, Tree)
466 assert isinstance(ours, Tree)
467 assert isinstance(theirs, Tree)
468
469 # Take care of the really trivial cases.
470 if base == ours:
471 return theirs
472 if base == theirs:
473 return ours
474 if ours == theirs:
475 return ours
476
477 index = self.temp_index()
478 try:
479 index.merge(base, ours, theirs)
480 try:
481 return index.write_tree()
482 except MergeException:
483 return None
484 finally:
485 index.delete()
486 def apply(self, tree, patch_text):
487 """Given a L{Tree} and a patch, will either return the new L{Tree}
488 that results when the patch is applied, or None if the patch
489 couldn't be applied."""
490 assert isinstance(tree, Tree)
491 if not patch_text:
492 return tree
493 index = self.temp_index()
494 try:
495 index.read_tree(tree)
496 try:
497 index.apply(patch_text)
498 return index.write_tree()
499 except MergeException:
500 return None
501 finally:
502 index.delete()
503 def diff_tree(self, t1, t2, diff_opts):
504 """Given two L{Tree}s C{t1} and C{t2}, return the patch that takes
505 C{t1} to C{t2}.
506
507 @type diff_opts: list of strings
508 @param diff_opts: Extra diff options
509 @rtype: String
510 @return: Patch text"""
511 assert isinstance(t1, Tree)
512 assert isinstance(t2, Tree)
513 return self.run(['git', 'diff-tree', '-p'] + list(diff_opts)
514 + [t1.sha1, t2.sha1]).raw_output()
515
516 class MergeException(exception.StgException):
517 """Exception raised when a merge fails for some reason."""
518
519 class MergeConflictException(MergeException):
520 """Exception raised when a merge fails due to conflicts."""
521
522 class Index(RunWithEnv):
523 """Represents a git index file."""
524 def __init__(self, repository, filename):
525 self.__repository = repository
526 if os.path.isdir(filename):
527 # Create a temp index in the given directory.
528 self.__filename = os.path.join(
529 filename, 'index.temp-%d-%x' % (os.getpid(), id(self)))
530 self.delete()
531 else:
532 self.__filename = filename
533 env = property(lambda self: utils.add_dict(
534 self.__repository.env, { 'GIT_INDEX_FILE': self.__filename }))
535 def read_tree(self, tree):
536 self.run(['git', 'read-tree', tree.sha1]).no_output()
537 def write_tree(self):
538 try:
539 return self.__repository.get_tree(
540 self.run(['git', 'write-tree']).discard_stderr(
541 ).output_one_line())
542 except run.RunException:
543 raise MergeException('Conflicting merge')
544 def is_clean(self):
545 try:
546 self.run(['git', 'update-index', '--refresh']).discard_output()
547 except run.RunException:
548 return False
549 else:
550 return True
551 def merge(self, base, ours, theirs):
552 """In-index merge, no worktree involved."""
553 self.run(['git', 'read-tree', '-m', '-i', '--aggressive',
554 base.sha1, ours.sha1, theirs.sha1]).no_output()
555 def apply(self, patch_text):
556 """In-index patch application, no worktree involved."""
557 try:
558 self.run(['git', 'apply', '--cached']
559 ).raw_input(patch_text).no_output()
560 except run.RunException:
561 raise MergeException('Patch does not apply cleanly')
562 def delete(self):
563 if os.path.isfile(self.__filename):
564 os.remove(self.__filename)
565 def conflicts(self):
566 """The set of conflicting paths."""
567 paths = set()
568 for line in self.run(['git', 'ls-files', '-z', '--unmerged']
569 ).raw_output().split('\0')[:-1]:
570 stat, path = line.split('\t', 1)
571 paths.add(path)
572 return paths
573
574 class Worktree(object):
575 """Represents a git worktree (that is, a checked-out file tree)."""
576 def __init__(self, directory):
577 self.__directory = directory
578 env = property(lambda self: { 'GIT_WORK_TREE': '.' })
579 directory = property(lambda self: self.__directory)
580
581 class CheckoutException(exception.StgException):
582 """Exception raised when a checkout fails."""
583
584 class IndexAndWorktree(RunWithEnvCwd):
585 """Represents a git index and a worktree. Anything that an index or
586 worktree can do on their own are handled by the L{Index} and
587 L{Worktree} classes; this class concerns itself with the
588 operations that require both."""
589 def __init__(self, index, worktree):
590 self.__index = index
591 self.__worktree = worktree
592 index = property(lambda self: self.__index)
593 env = property(lambda self: utils.add_dict(self.__index.env,
594 self.__worktree.env))
595 cwd = property(lambda self: self.__worktree.directory)
596 def checkout(self, old_tree, new_tree):
597 # TODO: Optionally do a 3-way instead of doing nothing when we
598 # have a problem. Or maybe we should stash changes in a patch?
599 assert isinstance(old_tree, Tree)
600 assert isinstance(new_tree, Tree)
601 try:
602 self.run(['git', 'read-tree', '-u', '-m',
603 '--exclude-per-directory=.gitignore',
604 old_tree.sha1, new_tree.sha1]
605 ).discard_output()
606 except run.RunException:
607 raise CheckoutException('Index/workdir dirty')
608 def merge(self, base, ours, theirs):
609 assert isinstance(base, Tree)
610 assert isinstance(ours, Tree)
611 assert isinstance(theirs, Tree)
612 try:
613 r = self.run(['git', 'merge-recursive', base.sha1, '--', ours.sha1,
614 theirs.sha1],
615 env = { 'GITHEAD_%s' % base.sha1: 'ancestor',
616 'GITHEAD_%s' % ours.sha1: 'current',
617 'GITHEAD_%s' % theirs.sha1: 'patched'})
618 r.discard_output()
619 except run.RunException, e:
620 if r.exitcode == 1:
621 raise MergeConflictException()
622 else:
623 raise MergeException('Index/worktree dirty')
624 def changed_files(self):
625 return self.run(['git', 'diff-files', '--name-only']).output_lines()
626 def update_index(self, files):
627 self.run(['git', 'update-index', '--remove', '-z', '--stdin']
628 ).input_nulterm(files).discard_output()
629
630 class Branch(object):
631 """Represents a Git branch."""
632 def __init__(self, repository, name):
633 self.__repository = repository
634 self.__name = name
635 try:
636 self.head
637 except KeyError:
638 raise BranchException('%s: no such branch' % name)
639
640 name = property(lambda self: self.__name)
641 repository = property(lambda self: self.__repository)
642
643 def __ref(self):
644 return 'refs/heads/%s' % self.__name
645 @property
646 def head(self):
647 return self.__repository.refs.get(self.__ref())
648 def set_head(self, commit, msg):
649 self.__repository.refs.set(self.__ref(), commit, msg)
650
651 def set_parent_remote(self, name):
652 value = config.set('branch.%s.remote' % self.__name, name)
653 def set_parent_branch(self, name):
654 if config.get('branch.%s.remote' % self.__name):
655 # Never set merge if remote is not set to avoid
656 # possibly-erroneous lookups into 'origin'
657 config.set('branch.%s.merge' % self.__name, name)
658
659 @classmethod
660 def create(cls, repository, name, create_at = None):
661 """Create a new Git branch and return the corresponding
662 L{Branch} object."""
663 try:
664 branch = cls(repository, name)
665 except BranchException:
666 branch = None
667 if branch:
668 raise BranchException('%s: branch already exists' % name)
669
670 cmd = ['git', 'branch']
671 if create_at:
672 cmd.append(create_at.sha1)
673 repository.run(['git', 'branch', create_at.sha1]).discard_output()
674
675 return cls(repository, name)