To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / extra / fast-export / hg-fast-export.py @ 1545:f81fcbde7eaf
History | View | Annotate | Download (15.2 KB)
| 1 |
#!/usr/bin/env python
|
|---|---|
| 2 |
|
| 3 |
# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
|
| 4 |
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
|
| 5 |
|
| 6 |
from mercurial import node |
| 7 |
from hg2git import setup_repo,fixup_user,get_branch,get_changeset |
| 8 |
from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name |
| 9 |
from optparse import OptionParser |
| 10 |
import re |
| 11 |
import sys |
| 12 |
import os |
| 13 |
|
| 14 |
if sys.platform == "win32": |
| 15 |
# On Windows, sys.stdout is initially opened in text mode, which means that
|
| 16 |
# when a LF (\n) character is written to sys.stdout, it will be converted
|
| 17 |
# into CRLF (\r\n). That makes git blow up, so use this platform-specific
|
| 18 |
# code to change the mode of sys.stdout to binary.
|
| 19 |
import msvcrt |
| 20 |
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) |
| 21 |
|
| 22 |
# silly regex to catch Signed-off-by lines in log message
|
| 23 |
sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
|
| 24 |
# insert 'checkpoint' command after this many commits or none at all if 0
|
| 25 |
cfg_checkpoint_count=0
|
| 26 |
# write some progress message every this many file contents written
|
| 27 |
cfg_export_boundary=1000
|
| 28 |
|
| 29 |
def gitmode(flags): |
| 30 |
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' |
| 31 |
|
| 32 |
def wr_no_nl(msg=''): |
| 33 |
if msg:
|
| 34 |
sys.stdout.write(msg) |
| 35 |
|
| 36 |
def wr(msg=''): |
| 37 |
wr_no_nl(msg) |
| 38 |
sys.stdout.write('\n')
|
| 39 |
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
|
| 40 |
|
| 41 |
def checkpoint(count): |
| 42 |
count=count+1
|
| 43 |
if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0: |
| 44 |
sys.stderr.write("Checkpoint after %d commits\n" % count)
|
| 45 |
wr('checkpoint')
|
| 46 |
wr() |
| 47 |
return count
|
| 48 |
|
| 49 |
def revnum_to_revref(rev, old_marks): |
| 50 |
"""Convert an hg revnum to a git-fast-import rev reference (an SHA1
|
| 51 |
or a mark)"""
|
| 52 |
return old_marks.get(rev) or ':%d' % (rev+1) |
| 53 |
|
| 54 |
def file_mismatch(f1,f2): |
| 55 |
"""See if two revisions of a file are not equal."""
|
| 56 |
return node.hex(f1)!=node.hex(f2)
|
| 57 |
|
| 58 |
def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch): |
| 59 |
"""Loop over our repository and find all changed and missing files."""
|
| 60 |
for left in dleft.keys(): |
| 61 |
right=dright.get(left,None)
|
| 62 |
if right==None: |
| 63 |
# we have the file but our parent hasn't: add to left set
|
| 64 |
l.append(left) |
| 65 |
elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)): |
| 66 |
# we have it but checksums mismatch: add to center set
|
| 67 |
c.append(left) |
| 68 |
for right in dright.keys(): |
| 69 |
left=dleft.get(right,None)
|
| 70 |
if left==None: |
| 71 |
# if parent has file but we don't: add to right set
|
| 72 |
r.append(right) |
| 73 |
# change is already handled when comparing child against parent
|
| 74 |
return l,c,r
|
| 75 |
|
| 76 |
def get_filechanges(repo,revision,parents,mleft): |
| 77 |
"""Given some repository and revision, find all changed/deleted files."""
|
| 78 |
l,c,r=[],[],[] |
| 79 |
for p in parents: |
| 80 |
if p<0: continue |
| 81 |
mright=repo.changectx(p).manifest() |
| 82 |
l,c,r=split_dict(mleft,mright,l,c,r) |
| 83 |
l.sort() |
| 84 |
c.sort() |
| 85 |
r.sort() |
| 86 |
return l,c,r
|
| 87 |
|
| 88 |
def get_author(logmessage,committer,authors): |
| 89 |
"""As git distincts between author and committer of a patch, try to
|
| 90 |
extract author by detecting Signed-off-by lines.
|
| 91 |
|
| 92 |
This walks from the end of the log message towards the top skipping
|
| 93 |
empty lines. Upon the first non-empty line, it walks all Signed-off-by
|
| 94 |
lines upwards to find the first one. For that (if found), it extracts
|
| 95 |
authorship information the usual way (authors table, cleaning, etc.)
|
| 96 |
|
| 97 |
If no Signed-off-by line is found, this defaults to the committer.
|
| 98 |
|
| 99 |
This may sound stupid (and it somehow is), but in log messages we
|
| 100 |
accidentially may have lines in the middle starting with
|
| 101 |
"Signed-off-by: foo" and thus matching our detection regex. Prevent
|
| 102 |
that."""
|
| 103 |
|
| 104 |
loglines=logmessage.split('\n')
|
| 105 |
i=len(loglines)
|
| 106 |
# from tail walk to top skipping empty lines
|
| 107 |
while i>=0: |
| 108 |
i-=1
|
| 109 |
if len(loglines[i].strip())==0: continue |
| 110 |
break
|
| 111 |
if i>=0: |
| 112 |
# walk further upwards to find first sob line, store in 'first'
|
| 113 |
first=None
|
| 114 |
while i>=0: |
| 115 |
m=sob_re.match(loglines[i]) |
| 116 |
if m==None: break |
| 117 |
first=m |
| 118 |
i-=1
|
| 119 |
# if the last non-empty line matches our Signed-Off-by regex: extract username
|
| 120 |
if first!=None: |
| 121 |
r=fixup_user(first.group(1),authors)
|
| 122 |
return r
|
| 123 |
return committer
|
| 124 |
|
| 125 |
def export_file_contents(ctx,manifest,files,hgtags,encoding=''): |
| 126 |
count=0
|
| 127 |
max=len(files)
|
| 128 |
for file in files: |
| 129 |
# Skip .hgtags files. They only get us in trouble.
|
| 130 |
if not hgtags and file == ".hgtags": |
| 131 |
sys.stderr.write('Skip %s\n' % (file)) |
| 132 |
continue
|
| 133 |
d=ctx.filectx(file).data()
|
| 134 |
if encoding:
|
| 135 |
filename=file.decode(encoding).encode('utf8') |
| 136 |
else:
|
| 137 |
filename=file
|
| 138 |
wr('M %s inline %s' % (gitmode(manifest.flags(file)), |
| 139 |
strip_leading_slash(filename))) |
| 140 |
wr('data %d' % len(d)) # had some trouble with size() |
| 141 |
wr(d) |
| 142 |
count+=1
|
| 143 |
if count%cfg_export_boundary==0: |
| 144 |
sys.stderr.write('Exported %d/%d files\n' % (count,max)) |
| 145 |
if max>cfg_export_boundary: |
| 146 |
sys.stderr.write('Exported %d/%d files\n' % (count,max)) |
| 147 |
|
| 148 |
def sanitize_name(name,what="branch"): |
| 149 |
"""Sanitize input roughly according to git-check-ref-format(1)"""
|
| 150 |
|
| 151 |
def dot(name): |
| 152 |
if name[0] == '.': return '_'+name[1:] |
| 153 |
return name
|
| 154 |
|
| 155 |
n=name |
| 156 |
p=re.compile('([[ ~^:?\\\\*]|\.\.)')
|
| 157 |
n=p.sub('_', n)
|
| 158 |
if n[-1] in ('/', '.'): n=n[:-1]+'_' |
| 159 |
n='/'.join(map(dot,n.split('/'))) |
| 160 |
p=re.compile('_+')
|
| 161 |
n=p.sub('_', n)
|
| 162 |
|
| 163 |
if n!=name:
|
| 164 |
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
|
| 165 |
return n
|
| 166 |
|
| 167 |
def strip_leading_slash(filename): |
| 168 |
if filename[0] == '/': |
| 169 |
return filename[1:] |
| 170 |
return filename
|
| 171 |
|
| 172 |
def export_commit(ui,repo,revision,old_marks,max,count,authors, |
| 173 |
branchesmap,sob,brmap,hgtags,notes,encoding='',fn_encoding=''): |
| 174 |
def get_branchname(name): |
| 175 |
if brmap.has_key(name):
|
| 176 |
return brmap[name]
|
| 177 |
n=sanitize_name(branchesmap.get(name,name)) |
| 178 |
brmap[name]=n |
| 179 |
return n
|
| 180 |
|
| 181 |
(revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding) |
| 182 |
|
| 183 |
branch=get_branchname(branch) |
| 184 |
|
| 185 |
parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] |
| 186 |
|
| 187 |
if len(parents)==0 and revision != 0: |
| 188 |
wr('reset refs/heads/%s' % branch)
|
| 189 |
|
| 190 |
wr('commit refs/heads/%s' % branch)
|
| 191 |
wr('mark :%d' % (revision+1)) |
| 192 |
if sob:
|
| 193 |
wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone))
|
| 194 |
wr('committer %s %d %s' % (user,time,timezone))
|
| 195 |
wr('data %d' % (len(desc)+1)) # wtf? |
| 196 |
wr(desc) |
| 197 |
wr() |
| 198 |
|
| 199 |
ctx=repo.changectx(str(revision))
|
| 200 |
man=ctx.manifest() |
| 201 |
added,changed,removed,type=[],[],[],''
|
| 202 |
|
| 203 |
if len(parents) == 0: |
| 204 |
# first revision: feed in full manifest
|
| 205 |
added=man.keys() |
| 206 |
added.sort() |
| 207 |
type='full'
|
| 208 |
else:
|
| 209 |
wr('from %s' % revnum_to_revref(parents[0], old_marks)) |
| 210 |
if len(parents) == 1: |
| 211 |
# later non-merge revision: feed in changed manifest
|
| 212 |
# if we have exactly one parent, just take the changes from the
|
| 213 |
# manifest without expensively comparing checksums
|
| 214 |
f=repo.status(repo.lookup(parents[0]),revnode)[:3] |
| 215 |
added,changed,removed=f[1],f[0],f[2] |
| 216 |
type='simple delta'
|
| 217 |
else: # a merge with two parents |
| 218 |
wr('merge %s' % revnum_to_revref(parents[1], old_marks)) |
| 219 |
# later merge revision: feed in changed manifest
|
| 220 |
# for many files comparing checksums is expensive so only do it for
|
| 221 |
# merges where we really need it due to hg's revlog logic
|
| 222 |
added,changed,removed=get_filechanges(repo,revision,parents,man) |
| 223 |
type='thorough delta'
|
| 224 |
|
| 225 |
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
|
| 226 |
(branch,type,revision+1,max,len(added),len(changed),len(removed))) |
| 227 |
|
| 228 |
if fn_encoding:
|
| 229 |
removed=[r.decode(fn_encoding).encode('utf8') for r in removed] |
| 230 |
|
| 231 |
removed=[strip_leading_slash(x) for x in removed] |
| 232 |
|
| 233 |
map(lambda r: wr('D %s' % r),removed) |
| 234 |
export_file_contents(ctx,man,added,hgtags,fn_encoding) |
| 235 |
export_file_contents(ctx,man,changed,hgtags,fn_encoding) |
| 236 |
wr() |
| 237 |
|
| 238 |
count=checkpoint(count) |
| 239 |
count=generate_note(user,time,timezone,revision,ctx,count,notes) |
| 240 |
return count
|
| 241 |
|
| 242 |
def generate_note(user,time,timezone,revision,ctx,count,notes): |
| 243 |
if not notes: |
| 244 |
return count
|
| 245 |
wr('commit refs/notes/hg')
|
| 246 |
wr('committer %s %d %s' % (user,time,timezone))
|
| 247 |
wr('data 0')
|
| 248 |
wr('N inline :%d' % (revision+1)) |
| 249 |
hg_hash=ctx.hex() |
| 250 |
wr('data %d' % (len(hg_hash))) |
| 251 |
wr_no_nl(hg_hash) |
| 252 |
wr() |
| 253 |
return checkpoint(count)
|
| 254 |
|
| 255 |
def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap): |
| 256 |
l=repo.tagslist() |
| 257 |
for tag,node in l: |
| 258 |
# Remap the branch name
|
| 259 |
tag=sanitize_name(tagsmap.get(tag,tag),"tag")
|
| 260 |
# ignore latest revision
|
| 261 |
if tag=='tip': continue |
| 262 |
# ignore tags to nodes that are missing (ie, 'in the future')
|
| 263 |
if node.encode('hex_codec') not in mapping_cache: |
| 264 |
sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec'))) |
| 265 |
continue
|
| 266 |
|
| 267 |
rev=int(mapping_cache[node.encode('hex_codec')]) |
| 268 |
|
| 269 |
ref=revnum_to_revref(rev, old_marks) |
| 270 |
if ref==None: |
| 271 |
sys.stderr.write('Failed to find reference for creating tag'
|
| 272 |
' %s at r%d\n' % (tag,rev))
|
| 273 |
continue
|
| 274 |
sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
|
| 275 |
wr('reset refs/tags/%s' % tag)
|
| 276 |
wr('from %s' % ref)
|
| 277 |
wr() |
| 278 |
count=checkpoint(count) |
| 279 |
return count
|
| 280 |
|
| 281 |
def load_mapping(name, filename): |
| 282 |
cache={}
|
| 283 |
if not os.path.exists(filename): |
| 284 |
return cache
|
| 285 |
f=open(filename,'r') |
| 286 |
l=0
|
| 287 |
a=0
|
| 288 |
lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
|
| 289 |
for line in f.readlines(): |
| 290 |
l+=1
|
| 291 |
line=line.strip() |
| 292 |
if line=='' or line[0]=='#': |
| 293 |
continue
|
| 294 |
m=lre.match(line) |
| 295 |
if m==None: |
| 296 |
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
|
| 297 |
continue
|
| 298 |
# put key:value in cache, key without ^:
|
| 299 |
cache[m.group(1).strip()]=m.group(2).strip() |
| 300 |
a+=1
|
| 301 |
f.close() |
| 302 |
sys.stderr.write('Loaded %d %s\n' % (a, name))
|
| 303 |
return cache
|
| 304 |
|
| 305 |
def branchtip(repo, heads): |
| 306 |
'''return the tipmost branch head in heads'''
|
| 307 |
tip = heads[-1]
|
| 308 |
for h in reversed(heads): |
| 309 |
if 'close' not in repo.changelog.read(h)[5]: |
| 310 |
tip = h |
| 311 |
break
|
| 312 |
return tip
|
| 313 |
|
| 314 |
def verify_heads(ui,repo,cache,force): |
| 315 |
branches={}
|
| 316 |
for bn, heads in repo.branchmap().iteritems(): |
| 317 |
branches[bn] = branchtip(repo, heads) |
| 318 |
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()] |
| 319 |
l.sort() |
| 320 |
|
| 321 |
# get list of hg's branches to verify, don't take all git has
|
| 322 |
for _,_,b in l: |
| 323 |
b=get_branch(b) |
| 324 |
sha1=get_git_sha1(b) |
| 325 |
c=cache.get(b) |
| 326 |
if sha1!=c:
|
| 327 |
sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
|
| 328 |
'\n%s (repo) != %s (cache)\n' % (b,sha1,c))
|
| 329 |
if not force: return False |
| 330 |
|
| 331 |
# verify that branch has exactly one head
|
| 332 |
t={}
|
| 333 |
for h in repo.heads(): |
| 334 |
(_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h) |
| 335 |
if t.get(branch,False): |
| 336 |
sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
|
| 337 |
repo.changelog.rev(h)) |
| 338 |
if not force: return False |
| 339 |
t[branch]=True
|
| 340 |
|
| 341 |
return True |
| 342 |
|
| 343 |
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, |
| 344 |
authors={},branchesmap={},tagsmap={},
|
| 345 |
sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding=''): |
| 346 |
_max=int(m)
|
| 347 |
|
| 348 |
old_marks=load_cache(marksfile,lambda s: int(s)-1) |
| 349 |
mapping_cache=load_cache(mappingfile) |
| 350 |
heads_cache=load_cache(headsfile) |
| 351 |
state_cache=load_cache(tipfile) |
| 352 |
|
| 353 |
ui,repo=setup_repo(repourl) |
| 354 |
|
| 355 |
if not verify_heads(ui,repo,heads_cache,force): |
| 356 |
return 1 |
| 357 |
|
| 358 |
try:
|
| 359 |
tip=repo.changelog.count() |
| 360 |
except AttributeError: |
| 361 |
tip=len(repo)
|
| 362 |
|
| 363 |
min=int(state_cache.get('tip',0)) |
| 364 |
max=_max |
| 365 |
if _max<0 or max>tip: |
| 366 |
max=tip |
| 367 |
|
| 368 |
for rev in range(0,max): |
| 369 |
(revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) |
| 370 |
mapping_cache[revnode.encode('hex_codec')] = str(rev) |
| 371 |
|
| 372 |
|
| 373 |
c=0
|
| 374 |
brmap={}
|
| 375 |
for rev in range(min,max): |
| 376 |
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
|
| 377 |
sob,brmap,hgtags,notes,encoding,fn_encoding) |
| 378 |
|
| 379 |
state_cache['tip']=max |
| 380 |
state_cache['repo']=repourl
|
| 381 |
save_cache(tipfile,state_cache) |
| 382 |
save_cache(mappingfile,mapping_cache) |
| 383 |
|
| 384 |
c=export_tags(ui,repo,old_marks,mapping_cache,c,authors,tagsmap) |
| 385 |
|
| 386 |
sys.stderr.write('Issued %d commands\n' % c)
|
| 387 |
|
| 388 |
return 0 |
| 389 |
|
| 390 |
if __name__=='__main__': |
| 391 |
def bail(parser,opt): |
| 392 |
sys.stderr.write('Error: No %s option given\n' % opt)
|
| 393 |
parser.print_help() |
| 394 |
sys.exit(2)
|
| 395 |
|
| 396 |
parser=OptionParser() |
| 397 |
|
| 398 |
parser.add_option("-m","--max",type="int",dest="max", |
| 399 |
help="Maximum hg revision to import")
|
| 400 |
parser.add_option("--mapping",dest="mappingfile", |
| 401 |
help="File to read last run's hg-to-git SHA1 mapping")
|
| 402 |
parser.add_option("--marks",dest="marksfile", |
| 403 |
help="File to read git-fast-import's marks from")
|
| 404 |
parser.add_option("--heads",dest="headsfile", |
| 405 |
help="File to read last run's git heads from")
|
| 406 |
parser.add_option("--status",dest="statusfile", |
| 407 |
help="File to read status from")
|
| 408 |
parser.add_option("-r","--repo",dest="repourl", |
| 409 |
help="URL of repo to import")
|
| 410 |
parser.add_option("-s",action="store_true",dest="sob", |
| 411 |
default=False,help="Enable parsing Signed-off-by lines") |
| 412 |
parser.add_option("--hgtags",action="store_true",dest="hgtags", |
| 413 |
default=False,help="Enable exporting .hgtags files") |
| 414 |
parser.add_option("-A","--authors",dest="authorfile", |
| 415 |
help="Read authormap from AUTHORFILE")
|
| 416 |
parser.add_option("-B","--branches",dest="branchesfile", |
| 417 |
help="Read branch map from BRANCHESFILE")
|
| 418 |
parser.add_option("-T","--tags",dest="tagsfile", |
| 419 |
help="Read tags map from TAGSFILE")
|
| 420 |
parser.add_option("-f","--force",action="store_true",dest="force", |
| 421 |
default=False,help="Ignore validation errors by force") |
| 422 |
parser.add_option("-M","--default-branch",dest="default_branch", |
| 423 |
help="Set the default branch")
|
| 424 |
parser.add_option("-o","--origin",dest="origin_name", |
| 425 |
help="use <name> as namespace to track upstream")
|
| 426 |
parser.add_option("--hg-hash",action="store_true",dest="notes", |
| 427 |
default=False,help="Annotate commits with the hg hash as git notes in the hg namespace") |
| 428 |
parser.add_option("-e",dest="encoding", |
| 429 |
help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
|
| 430 |
parser.add_option("--fe",dest="fn_encoding", |
| 431 |
help="Assume file names from Mercurial are encoded in <filename_encoding>")
|
| 432 |
|
| 433 |
(options,args)=parser.parse_args() |
| 434 |
|
| 435 |
m=-1
|
| 436 |
if options.max!=None: m=options.max |
| 437 |
|
| 438 |
if options.marksfile==None: bail(parser,'--marks') |
| 439 |
if options.mappingfile==None: bail(parser,'--mapping') |
| 440 |
if options.headsfile==None: bail(parser,'--heads') |
| 441 |
if options.statusfile==None: bail(parser,'--status') |
| 442 |
if options.repourl==None: bail(parser,'--repo') |
| 443 |
|
| 444 |
a={}
|
| 445 |
if options.authorfile!=None: |
| 446 |
a=load_mapping('authors', options.authorfile)
|
| 447 |
|
| 448 |
b={}
|
| 449 |
if options.branchesfile!=None: |
| 450 |
b=load_mapping('branches', options.branchesfile)
|
| 451 |
|
| 452 |
t={}
|
| 453 |
if options.tagsfile!=None: |
| 454 |
t=load_mapping('tags', options.tagsfile)
|
| 455 |
|
| 456 |
if options.default_branch!=None: |
| 457 |
set_default_branch(options.default_branch) |
| 458 |
|
| 459 |
if options.origin_name!=None: |
| 460 |
set_origin_name(options.origin_name) |
| 461 |
|
| 462 |
encoding=''
|
| 463 |
if options.encoding!=None: |
| 464 |
encoding=options.encoding |
| 465 |
|
| 466 |
fn_encoding=encoding |
| 467 |
if options.fn_encoding!=None: |
| 468 |
fn_encoding=options.fn_encoding |
| 469 |
|
| 470 |
sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile, |
| 471 |
options.headsfile, options.statusfile, |
| 472 |
authors=a,branchesmap=b,tagsmap=t, |
| 473 |
sob=options.sob,force=options.force,hgtags=options.hgtags, |
| 474 |
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding)) |