Optimise 'push' to use git-apply instead of git-read-tree
[stgit] / stgit / git.py
1 """Python GIT interface
2 """
3
4 __copyright__ = """
5 Copyright (C) 2005, Catalin Marinas <catalin.marinas@gmail.com>
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License version 2 as
9 published by the Free Software Foundation.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 """
20
21 import sys, os, glob, popen2
22
23 from stgit.utils import *
24
25 # git exception class
26 class GitException(Exception):
27 pass
28
29
30 # Different start-up variables read from the environment
31 if 'GIT_DIR' in os.environ:
32 base_dir = os.environ['GIT_DIR']
33 else:
34 base_dir = '.git'
35
36 head_link = os.path.join(base_dir, 'HEAD')
37
38 #
39 # Classes
40 #
41 class Commit:
42 """Handle the commit objects
43 """
44 def __init__(self, id_hash):
45 self.__id_hash = id_hash
46
47 lines = _output_lines('git-cat-file commit %s' % id_hash)
48 self.__parents = []
49 for i in range(len(lines)):
50 line = lines[i]
51 if line == '\n':
52 break
53 field = line.strip().split(' ', 1)
54 if field[0] == 'tree':
55 self.__tree = field[1]
56 elif field[0] == 'parent':
57 self.__parents.append(field[1])
58 if field[0] == 'author':
59 self.__author = field[1]
60 if field[0] == 'committer':
61 self.__committer = field[1]
62 self.__log = ''.join(lines[i+1:])
63
64 def get_id_hash(self):
65 return self.__id_hash
66
67 def get_tree(self):
68 return self.__tree
69
70 def get_parent(self):
71 return self.__parents[0]
72
73 def get_parents(self):
74 return self.__parents
75
76 def get_author(self):
77 return self.__author
78
79 def get_committer(self):
80 return self.__committer
81
82 def get_log(self):
83 return self.__log
84
85 # dictionary of Commit objects, used to avoid multiple calls to git
86 __commits = dict()
87
88 #
89 # Functions
90 #
91 def get_commit(id_hash):
92 """Commit objects factory. Save/look-up them in the __commits
93 dictionary
94 """
95 if id_hash in __commits:
96 return __commits[id_hash]
97 else:
98 commit = Commit(id_hash)
99 __commits[id_hash] = commit
100 return commit
101
102 def get_conflicts():
103 """Return the list of file conflicts
104 """
105 conflicts_file = os.path.join(base_dir, 'conflicts')
106 if os.path.isfile(conflicts_file):
107 f = file(conflicts_file)
108 names = [line.strip() for line in f.readlines()]
109 f.close()
110 return names
111 else:
112 return None
113
114 def _input(cmd, file_desc):
115 p = popen2.Popen3(cmd)
116 while True:
117 line = file_desc.readline()
118 if not line:
119 break
120 p.tochild.write(line)
121 p.tochild.close()
122 if p.wait():
123 raise GitException, '%s failed' % str(cmd)
124
125 def _output(cmd):
126 p=popen2.Popen3(cmd)
127 string = p.fromchild.read()
128 if p.wait():
129 raise GitException, '%s failed' % str(cmd)
130 return string
131
132 def _output_one_line(cmd, file_desc = None):
133 p=popen2.Popen3(cmd)
134 if file_desc != None:
135 for line in file_desc:
136 p.tochild.write(line)
137 p.tochild.close()
138 string = p.fromchild.readline().strip()
139 if p.wait():
140 raise GitException, '%s failed' % str(cmd)
141 return string
142
143 def _output_lines(cmd):
144 p=popen2.Popen3(cmd)
145 lines = p.fromchild.readlines()
146 if p.wait():
147 raise GitException, '%s failed' % str(cmd)
148 return lines
149
150 def __run(cmd, args=None):
151 """__run: runs cmd using spawnvp.
152
153 Runs cmd using spawnvp. The shell is avoided so it won't mess up
154 our arguments. If args is very large, the command is run multiple
155 times; args is split xargs style: cmd is passed on each
156 invocation. Unlike xargs, returns immediately if any non-zero
157 return code is received.
158 """
159
160 args_l=cmd.split()
161 if args is None:
162 args = []
163 for i in range(0, len(args)+1, 100):
164 r=os.spawnvp(os.P_WAIT, args_l[0], args_l + args[i:min(i+100, len(args))])
165 if r:
166 return r
167 return 0
168
169 def __check_base_dir():
170 return os.path.isdir(base_dir)
171
172 def __tree_status(files = [], tree_id = 'HEAD', unknown = False,
173 noexclude = True):
174 """Returns a list of pairs - [status, filename]
175 """
176 os.system('git-update-index --refresh > /dev/null')
177
178 cache_files = []
179
180 # unknown files
181 if unknown:
182 exclude_file = os.path.join(base_dir, 'info', 'exclude')
183 base_exclude = ['--exclude=%s' % s for s in
184 ['*.[ao]', '*.pyc', '.*', '*~', '#*', 'TAGS', 'tags']]
185 base_exclude.append('--exclude-per-directory=.gitignore')
186
187 if os.path.exists(exclude_file):
188 extra_exclude = ['--exclude-from=%s' % exclude_file]
189 else:
190 extra_exclude = []
191 if noexclude:
192 extra_exclude = base_exclude = []
193
194 lines = _output_lines(['git-ls-files', '--others'] + base_exclude
195 + extra_exclude)
196 cache_files += [('?', line.strip()) for line in lines]
197
198 # conflicted files
199 conflicts = get_conflicts()
200 if not conflicts:
201 conflicts = []
202 cache_files += [('C', filename) for filename in conflicts]
203
204 # the rest
205 for line in _output_lines(['git-diff-index', '-r', tree_id] + files):
206 fs = tuple(line.rstrip().split(' ',4)[-1].split('\t',1))
207 if fs[1] not in conflicts:
208 cache_files.append(fs)
209
210 return cache_files
211
212 def local_changes():
213 """Return true if there are local changes in the tree
214 """
215 return len(__tree_status()) != 0
216
217 def get_head():
218 """Returns a string representing the HEAD
219 """
220 return read_string(head_link)
221
222 def get_head_file():
223 """Returns the name of the file pointed to by the HEAD link
224 """
225 # valid link
226 if os.path.islink(head_link) and os.path.isfile(head_link):
227 return os.path.basename(os.readlink(head_link))
228 else:
229 raise GitException, 'Invalid .git/HEAD link. Git tree not initialised?'
230
231 def __set_head(val):
232 """Sets the HEAD value
233 """
234 write_string(head_link, val)
235
236 def rev_parse(git_id):
237 """Parse the string and return an SHA1 id
238 """
239 return _output(['git-rev-parse', git_id]).strip()
240
241 def add(names):
242 """Add the files or recursively add the directory contents
243 """
244 # generate the file list
245 files = []
246 for i in names:
247 if not os.path.exists(i):
248 raise GitException, 'Unknown file or directory: %s' % i
249
250 if os.path.isdir(i):
251 # recursive search. We only add files
252 for root, dirs, local_files in os.walk(i):
253 for name in [os.path.join(root, f) for f in local_files]:
254 if os.path.isfile(name):
255 files.append(os.path.normpath(name))
256 elif os.path.isfile(i):
257 files.append(os.path.normpath(i))
258 else:
259 raise GitException, '%s is not a file or directory' % i
260
261 if files:
262 if __run('git-update-index --add --', files):
263 raise GitException, 'Unable to add file'
264
265 def rm(files, force = False):
266 """Remove a file from the repository
267 """
268 if force:
269 git_opt = '--force-remove'
270 else:
271 git_opt = '--remove'
272
273 if not force:
274 for f in files:
275 if os.path.exists(f):
276 raise GitException, '%s exists. Remove it first' %f
277 if files:
278 __run('git-update-index --remove --', files)
279 else:
280 if files:
281 __run('git-update-index --force-remove --', files)
282
283 def update_cache(files = [], force = False):
284 """Update the cache information for the given files
285 """
286 cache_files = __tree_status(files)
287
288 # everything is up-to-date
289 if len(cache_files) == 0:
290 return False
291
292 # check for unresolved conflicts
293 if not force and [x for x in cache_files
294 if x[0] not in ['M', 'N', 'A', 'D']]:
295 raise GitException, 'Updating cache failed: unresolved conflicts'
296
297 # update the cache
298 add_files = [x[1] for x in cache_files if x[0] in ['N', 'A']]
299 rm_files = [x[1] for x in cache_files if x[0] in ['D']]
300 m_files = [x[1] for x in cache_files if x[0] in ['M']]
301
302 if add_files and __run('git-update-index --add --', add_files) != 0:
303 raise GitException, 'Failed git-update-index --add'
304 if rm_files and __run('git-update-index --force-remove --', rm_files) != 0:
305 raise GitException, 'Failed git-update-index --rm'
306 if m_files and __run('git-update-index --', m_files) != 0:
307 raise GitException, 'Failed git-update-index'
308
309 return True
310
311 def commit(message, files = [], parents = [], allowempty = False,
312 cache_update = True, tree_id = None,
313 author_name = None, author_email = None, author_date = None,
314 committer_name = None, committer_email = None):
315 """Commit the current tree to repository
316 """
317 # Get the tree status
318 if cache_update and parents != []:
319 changes = update_cache(files)
320 if not changes and not allowempty:
321 raise GitException, 'No changes to commit'
322
323 # get the commit message
324 if message[-1:] != '\n':
325 message += '\n'
326
327 must_switch = True
328 # write the index to repository
329 if tree_id == None:
330 tree_id = _output_one_line('git-write-tree')
331 else:
332 must_switch = False
333
334 # the commit
335 cmd = ''
336 if author_name:
337 cmd += 'GIT_AUTHOR_NAME="%s" ' % author_name
338 if author_email:
339 cmd += 'GIT_AUTHOR_EMAIL="%s" ' % author_email
340 if author_date:
341 cmd += 'GIT_AUTHOR_DATE="%s" ' % author_date
342 if committer_name:
343 cmd += 'GIT_COMMITTER_NAME="%s" ' % committer_name
344 if committer_email:
345 cmd += 'GIT_COMMITTER_EMAIL="%s" ' % committer_email
346 cmd += 'git-commit-tree %s' % tree_id
347
348 # get the parents
349 for p in parents:
350 cmd += ' -p %s' % p
351
352 commit_id = _output_one_line(cmd, message)
353 if must_switch:
354 __set_head(commit_id)
355
356 return commit_id
357
358 def apply_diff(rev1, rev2):
359 """Apply the diff between rev1 and rev2 onto the current
360 index. This function doesn't need to raise an exception since it
361 is only used for fast-pushing a patch. If this operation fails,
362 the pushing would fall back to the three-way merge.
363 """
364 return os.system('git-diff-tree -p %s %s | git-apply --index 2> /dev/null'
365 % (rev1, rev2)) == 0
366
367 def merge(base, head1, head2):
368 """Perform a 3-way merge between base, head1 and head2 into the
369 local tree
370 """
371 if __run('git-read-tree -u -m', [base, head1, head2]) != 0:
372 raise GitException, 'git-read-tree failed (local changes maybe?)'
373
374 # this can fail if there are conflicts
375 if os.system('git-merge-index -o -q gitmergeonefile.py -a') != 0:
376 raise GitException, 'git-merge-cache failed (possible conflicts)'
377
378 def status(files = [], modified = False, new = False, deleted = False,
379 conflict = False, unknown = False, noexclude = False):
380 """Show the tree status
381 """
382 cache_files = __tree_status(files, unknown = True, noexclude = noexclude)
383 all = not (modified or new or deleted or conflict or unknown)
384
385 if not all:
386 filestat = []
387 if modified:
388 filestat.append('M')
389 if new:
390 filestat.append('A')
391 filestat.append('N')
392 if deleted:
393 filestat.append('D')
394 if conflict:
395 filestat.append('C')
396 if unknown:
397 filestat.append('?')
398 cache_files = [x for x in cache_files if x[0] in filestat]
399
400 for fs in cache_files:
401 if all:
402 print '%s %s' % (fs[0], fs[1])
403 else:
404 print '%s' % fs[1]
405
406 def diff(files = [], rev1 = 'HEAD', rev2 = None, out_fd = None):
407 """Show the diff between rev1 and rev2
408 """
409
410 if rev2:
411 diff_str = _output(['git-diff-tree', '-p', rev1, rev2] + files)
412 else:
413 os.system('git-update-index --refresh > /dev/null')
414 diff_str = _output(['git-diff-index', '-p', rev1] + files)
415
416 if out_fd:
417 out_fd.write(diff_str)
418 else:
419 return diff_str
420
421 def diffstat(files = [], rev1 = 'HEAD', rev2 = None):
422 """Return the diffstat between rev1 and rev2
423 """
424
425 p=popen2.Popen3('git-apply --stat')
426 diff(files, rev1, rev2, p.tochild)
427 p.tochild.close()
428 str = p.fromchild.read().rstrip()
429 if p.wait():
430 raise GitException, 'git.diffstat failed'
431 return str
432
433 def files(rev1, rev2):
434 """Return the files modified between rev1 and rev2
435 """
436
437 str = ''
438 for line in _output_lines('git-diff-tree -r %s %s' % (rev1, rev2)):
439 str += '%s %s\n' % tuple(line.rstrip().split(' ',4)[-1].split('\t',1))
440
441 return str.rstrip()
442
443 def barefiles(rev1, rev2):
444 """Return the files modified between rev1 and rev2, without status info
445 """
446
447 str = ''
448 for line in _output_lines('git-diff-tree -r %s %s' % (rev1, rev2)):
449 str += '%s\n' % line.rstrip().split(' ',4)[-1].split('\t',1)[-1]
450
451 return str.rstrip()
452
453 def checkout(files = [], tree_id = None, force = False):
454 """Check out the given or all files
455 """
456 if tree_id and __run('git-read-tree -m', [tree_id]) != 0:
457 raise GitException, 'Failed git-read-tree -m %s' % tree_id
458
459 checkout_cmd = 'git-checkout-index -q -u'
460 if force:
461 checkout_cmd += ' -f'
462 if len(files) == 0:
463 checkout_cmd += ' -a'
464 else:
465 checkout_cmd += ' --'
466
467 if __run(checkout_cmd, files) != 0:
468 raise GitException, 'Failed git-checkout-index'
469
470 def switch(tree_id):
471 """Switch the tree to the given id
472 """
473 if __run('git-read-tree -u -m', [get_head(), tree_id]) != 0:
474 raise GitException, 'git-read-tree failed (local changes maybe?)'
475
476 __set_head(tree_id)
477
478 def reset(tree_id = None):
479 """Revert the tree changes relative to the given tree_id. It removes
480 any local changes
481 """
482 if not tree_id:
483 tree_id = get_head()
484
485 cache_files = __tree_status(tree_id = tree_id)
486 rm_files = [x[1] for x in cache_files if x[0] in ['D']]
487
488 checkout(tree_id = tree_id, force = True)
489 __set_head(tree_id)
490
491 # checkout doesn't remove files
492 map(os.remove, rm_files)
493
494 def pull(repository = 'origin', refspec = None):
495 """Pull changes from the remote repository. At the moment, just
496 use the 'git pull' command
497 """
498 args = [repository]
499 if refspec:
500 args.append(refspec)
501
502 if __run('git pull', args) != 0:
503 raise GitException, 'Failed "git pull %s"' % repository
504
505 def apply_patch(filename = None):
506 """Apply a patch onto the current index. There must not be any
507 local changes in the tree, otherwise the command fails
508 """
509 os.system('git-update-index --refresh > /dev/null')
510
511 if filename:
512 if __run('git-apply --index', [filename]) != 0:
513 raise GitException, 'Patch does not apply cleanly'
514 else:
515 _input('git-apply --index', sys.stdin)
516
517 def clone(repository, local_dir):
518 """Clone a remote repository. At the moment, just use the
519 'git clone' script
520 """
521 if __run('git clone', [repository, local_dir]) != 0:
522 raise GitException, 'Failed "git clone %s %s"' \
523 % (repository, local_dir)