# commit.py
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
from gitdb.typ import ObjectType
from tree import Tree
from cStringIO import StringIO
import base
from gitdb.util import (
hex_to_bin,
Actor,
)
from util import (
Traversable,
Serializable,
altz_to_utctz_str,
parse_actor_and_date
)
import sys
__all__ = ('Commit', )
class Commit(base.Object, Traversable, Serializable):
"""Wraps a git Commit object.
This class will act lazily on some of its attributes and will query the
value on demand only if it involves calling the git binary."""
# ENVIRONMENT VARIABLES
# read when creating new commits
env_author_date = "GIT_AUTHOR_DATE"
env_committer_date = "GIT_COMMITTER_DATE"
# CONFIGURATION KEYS
conf_encoding = 'i18n.commitencoding'
# INVARIANTS
default_encoding = "UTF-8"
# object configuration
type = ObjectType.commit
type_id = ObjectType.commit_id
__slots__ = ("tree",
"author", "authored_date", "author_tz_offset",
"committer", "committed_date", "committer_tz_offset",
"message", "parents", "encoding")
_id_attribute_ = "binsha"
def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
committer=None, committed_date=None, committer_tz_offset=None,
message=None, parents=None, encoding=None):
"""Instantiate a new Commit. All keyword arguments taking None as default will
be implicitly set on first query.
:param binsha: 20 byte sha1
:param parents: tuple( Commit, ... )
is a tuple of commit ids or actual Commits
:param tree: Tree
Tree object
:param author: Actor
is the author string ( will be implicitly converted into an Actor object )
:param authored_date: int_seconds_since_epoch
is the authored DateTime - use time.gmtime() to convert it into a
different format
:param author_tz_offset: int_seconds_west_of_utc
is the timezone that the authored_date is in
:param committer: Actor
is the committer string
:param committed_date: int_seconds_since_epoch
is the committed DateTime - use time.gmtime() to convert it into a
different format
:param committer_tz_offset: int_seconds_west_of_utc
is the timezone that the authored_date is in
:param message: string
is the commit message
:param encoding: string
encoding of the message, defaults to UTF-8
:param parents:
List or tuple of Commit objects which are our parent(s) in the commit
dependency graph
:return: git.Commit
:note: Timezone information is in the same format and in the same sign
as what time.altzone returns. The sign is inverted compared to git's
UTC timezone."""
super(Commit,self).__init__(odb, binsha)
if tree is not None:
assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
if tree is not None:
self.tree = tree
if author is not None:
self.author = author
if authored_date is not None:
self.authored_date = authored_date
if author_tz_offset is not None:
self.author_tz_offset = author_tz_offset
if committer is not None:
self.committer = committer
if committed_date is not None:
self.committed_date = committed_date
if committer_tz_offset is not None:
self.committer_tz_offset = committer_tz_offset
if message is not None:
self.message = message
if parents is not None:
self.parents = parents
if encoding is not None:
self.encoding = encoding
@classmethod
def _get_intermediate_items(cls, commit):
return commit.parents
def _set_cache_(self, attr):
if attr in Commit.__slots__:
# read the data in a chunk, its faster - then provide a file wrapper
binsha, typename, self.size, stream = self.odb.stream(self.binsha)
self._deserialize(StringIO(stream.read()))
else:
super(Commit, self)._set_cache_(attr)
# END handle attrs
@property
def summary(self):
""":return: First line of the commit message"""
return self.message.split('\n', 1)[0]
@classmethod
def _iter_from_process_or_stream(cls, odb, proc_or_stream):
"""Parse out commit information into a list of Commit objects
We expect one-line per commit, and parse the actual commit information directly
from our lighting fast object database
:param proc: git-rev-list process instance - one sha per line
:return: iterator returning Commit objects"""
stream = proc_or_stream
if not hasattr(stream,'readline'):
stream = proc_or_stream.stdout
readline = stream.readline
while True:
line = readline()
if not line:
break
hexsha = line.strip()
if len(hexsha) > 40:
# split additional information, as returned by bisect for instance
hexsha, rest = line.split(None, 1)
# END handle extra info
assert len(hexsha) == 40, "Invalid line: %s" % hexsha
yield cls(odb, hex_to_bin(hexsha))
# END for each line in stream
#{ Serializable Implementation
def _serialize(self, stream):
write = stream.write
write("tree %s\n" % self.tree)
for p in self.parents:
write("parent %s\n" % p)
a = self.author
aname = a.name
if isinstance(aname, unicode):
aname = aname.encode(self.encoding)
# END handle unicode in name
c = self.committer
fmt = "%s %s <%s> %s %s\n"
write(fmt % ("author", aname, a.email,
self.authored_date,
altz_to_utctz_str(self.author_tz_offset)))
# encode committer
aname = c.name
if isinstance(aname, unicode):
aname = aname.encode(self.encoding)
# END handle unicode in name
write(fmt % ("committer", aname, c.email,
self.committed_date,
altz_to_utctz_str(self.committer_tz_offset)))
if self.encoding != self.default_encoding:
write("encoding %s\n" % self.encoding)
write("\n")
# write plain bytes, be sure its encoded according to our encoding
if isinstance(self.message, unicode):
write(self.message.encode(self.encoding))
else:
write(self.message)
# END handle encoding
return self
def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
readline = stream.readline
self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
self.parents = list()
next_line = None
while True:
parent_line = readline()
if not parent_line.startswith('parent'):
next_line = parent_line
break
# END abort reading parents
self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
# END for each parent line
self.parents = tuple(self.parents)
self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
# now we can have the encoding line, or an empty line followed by the optional
# message.
self.encoding = self.default_encoding
# read encoding or empty line to separate message
enc = readline()
enc = enc.strip()
if enc:
self.encoding = enc[enc.find(' ')+1:]
# now comes the message separator
readline()
# END handle encoding
# decode the authors name
try:
self.author.name = self.author.name.decode(self.encoding)
except UnicodeDecodeError:
print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
# END handle author's encoding
# decode committer name
try:
self.committer.name = self.committer.name.decode(self.encoding)
except UnicodeDecodeError:
print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
# END handle author's encoding
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
self.message = stream.read()
try:
self.message = self.message.decode(self.encoding)
except UnicodeDecodeError:
print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
# END exception handling
return self
#} END serializable implementation