Mercurial > hg > soundsoftware-site
changeset 1565:4e21f91ad4ff
Merge from live branch
author | Chris Cannam |
---|---|
date | Thu, 04 Feb 2016 08:47:09 +0000 |
parents | 404aa68d4227 (current diff) 53c879bb2f7f (diff) |
children | fb03674bdde1 |
files | |
diffstat | 21 files changed, 2032 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/Gemfile Thu Sep 11 12:46:20 2014 +0100 +++ b/Gemfile Thu Feb 04 08:47:09 2016 +0000 @@ -1,6 +1,6 @@ source 'https://rubygems.org' -gem "rails", "3.2.19" +gem "rails", "~> 3.2.22" gem "rake", "~> 10.1.1" gem "jquery-rails", "~> 2.0.2" gem "coderay", "~> 1.1.0"
--- a/app/controllers/attachments_controller.rb Thu Sep 11 12:46:20 2014 +0100 +++ b/app/controllers/attachments_controller.rb Thu Feb 04 08:47:09 2016 +0000 @@ -52,18 +52,18 @@ end def download - # cc: formerly this happened only if "@attachment.container.is_a?(Version)" - # or Project. Not good for us, we want to tally all downloads [by humans] - if not user_is_search_bot? - @attachment.increment_download - end - if stale?(:etag => @attachment.digest) # images are sent inline send_file @attachment.diskfile, :filename => filename_for_content_disposition(@attachment.filename), :type => detect_content_type(@attachment), :disposition => (@attachment.image? ? 'inline' : 'attachment') end + + # cc: formerly this happened only if "@attachment.container.is_a?(Version)" + # or Project. Not good for us, we want to tally all downloads [by humans] + if not user_is_search_bot? + @attachment.increment_download + end end def thumbnail
--- a/app/models/repository.rb Thu Sep 11 12:46:20 2014 +0100 +++ b/app/models/repository.rb Thu Feb 04 08:47:09 2016 +0000 @@ -321,7 +321,23 @@ elsif committer.strip =~ /^([^<]+)(<(.*)>)?$/ username, email = $1.strip, $3 u = User.find_by_login(username) - u ||= User.find_by_mail(email) unless email.blank? + if u.nil? + if email.blank? + if username.strip =~ /^([^ ]+) ([^ ]+)$/ + first, last = $1, $2 + uu = User.where(:firstname => first, :lastname => last) + if uu.empty? + logger.warn "find_committer_user: found no user with name matching #{username}, ignoring" + elsif uu.length == 1 + u = uu.first + else + logger.warn "find_committer_user: found more than one (#{uu.length}) results for user named #{username}, ignoring" + end + end + else + u = User.find_by_mail(email) + end + end user = u end @found_committer_users[committer] = user
--- a/app/views/projects/explore.html.erb Thu Sep 11 12:46:20 2014 +0100 +++ b/app/views/projects/explore.html.erb Thu Feb 04 08:47:09 2016 +0000 @@ -11,12 +11,12 @@ <h2><%= l(:label_explore_projects) %></h2> <div class="threecolumnleft"> -<% cache(:action => 'explore', :action_suffix => 'tags') do %> <div class="tags box"> <h3><%=l(:label_project_tags_all)%></h3> + <% cache(:action => 'explore', :action_suffix => 'tags') do %> <%= render :partial => 'projects/tagcloud' %> + <% end %> </div> -<% end %> </div> <div class="threecolumnright"> @@ -28,33 +28,33 @@ </div> <div class="threecolumnleft"> - <% cache(:action => 'explore', :action_suffix => 'busy_institutions') do %> <div class="institutions box"> <h3><%=l(:label_institutions_busy)%></h3> + <% cache(:action => 'explore', :action_suffix => 'busy_institutions') do %> <%= render :partial => 'activities/busy_institution' %> - <%= link_to l(:label_overall_activity), { :controller => 'activities', :action => 'index' }, :class => 'more' %> + <% end %> + <%= link_to l(:label_overall_activity), { :controller => 'activities', :action => 'index' }, :class => 'more' %> </div> - <% end %> </div> <div class="threecolumnright"> - <% cache(:action => 'explore', :action_suffix => 'busy_projects') do %> <div class="projects box"> <h3><%=l(:label_projects_busy)%></h3> + <% cache(:action => 'explore', :action_suffix => 'busy_projects') do %> <%= render :partial => 'activities/busy' %> + <% end %> <%= link_to l(:label_overall_activity), { :controller => 'activities', :action => 'index' }, :class => 'more' %> </div> - <% end %> </div> <div class="threecolumnmid"> - <% cache(:action => 'explore', :action_suffix => 'mature_projects') do %> <div class="projects box"> <h3><%=l(:label_projects_mature)%></h3> + <% cache(:action => 'explore', :action_suffix => 'mature_projects') do %> <%= render :partial => 'projects/mature' %> + <% end %> <%= link_to l(:label_projects_more), { :controller => 'projects' }, :class => 'more' %> </div> - <% end %> </div> <% html_title(l(:label_explore_projects)) -%>
--- a/config/locales/de.yml Thu Sep 11 12:46:20 2014 +0100 +++ b/config/locales/de.yml Thu Feb 04 08:47:09 2016 +0000 @@ -993,7 +993,6 @@ label_in: an label_today: heute label_all_time: gesamter Zeitraum ->>>>>>> other label_yesterday: gestern mail_body_account_activation_request: "Ein neuer Benutzer (%{value}) hat sich registriert. Sein Konto wartet auf Ihre Genehmigung:"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/.gitignore Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,4 @@ +svn-archive +svn-fast-export +*.pyc +.dotest
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/Makefile Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,14 @@ +SVN ?= /usr/local/svn +APR_INCLUDES ?= /usr/include/apr-1.0 +CFLAGS += -I${APR_INCLUDES} -I${SVN}/include/subversion-1 -pipe -O2 -std=c99 +LDFLAGS += -L${SVN}/lib -lsvn_fs-1 -lsvn_repos-1 + +all: svn-fast-export svn-archive + +svn-fast-export: svn-fast-export.c +svn-archive: svn-archive.c + +.PHONY: clean + +clean: + rm -rf svn-fast-export svn-archive
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/README.md Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,104 @@ +hg-fast-export.(sh|py) - mercurial to git converter using git-fast-import +========================================================================= + +Legal +----- + +Most hg-* scripts are licensed under the [MIT license] +(http://www.opensource.org/licenses/mit-license.php) and were written +by Rocco Rutte <pdmef@gmx.net> with hints and help from the git list and +\#mercurial on freenode. hg-reset.py is licensed under GPLv2 since it +copies some code from the mercurial sources. + +The current maintainer is Frej Drejhammar <frej.drejhammar@gmail.com>. + +Usage +----- + +Using hg-fast-export is quite simple for a mercurial repository <repo>: + +``` +mkdir repo-git # or whatever +cd repo-git +git init +hg-fast-export.sh -r <repo> +``` + +Please note that hg-fast-export does not automatically check out the +newly imported repository. You probably want to follow up the import +with a `git checkout`-command. + +Incremental imports to track hg repos is supported, too. + +Using hg-reset it is quite simple within a git repository that is +hg-fast-export'ed from mercurial: + +``` +hg-reset.sh -R <revision> +``` + +will give hints on which branches need adjustment for starting over +again. + +When a mercurial repository does not use utf-8 for encoding author +strings and commit messages the `-e <encoding>` command line option +can be used to force fast-export to convert incoming meta data from +<encoding> to utf-8. This encoding option is also applied to file names. + +In some locales Mercurial uses different encodings for commit messages +and file names. In that case, you can use `--fe <encoding>` command line +option which overrides the -e option for file names. + +As mercurial appears to be much less picky about the syntax of the +author information than git, an author mapping file can be given to +hg-fast-export to fix up malformed author strings. The file is +specified using the -A option. The file should contain lines of the +form `FromAuthor=ToAuthor`. The example authors.map below will +translate `User <garbage<user@example.com>` to `User <user@example.com>`. + +``` +-- Start of authors.map -- +User <garbage<user@example.com>=User <user@example.com> +-- End of authors.map -- +``` + +Tag and Branch Naming +--------------------- + +As Git and Mercurial have differ in what is a valid branch and tag +name the -B and -T options allow a mapping file to be specified to +rename branches and tags (respectively). The syntax of the mapping +file is the same as for the author mapping. + +Notes/Limitations +----------------- + +hg-fast-export supports multiple branches but only named branches with +exactly one head each. Otherwise commits to the tip of these heads +within the branch will get flattened into merge commits. + +As each git-fast-import run creates a new pack file, it may be +required to repack the repository quite often for incremental imports +(especially when importing a small number of changesets per +incremental import). + +The way the hg API and remote access protocol is designed it is not +possible to use hg-fast-export on remote repositories +(http/ssh). First clone the repository, then convert it. + +Design +------ + +hg-fast-export.py was designed in a way that doesn't require a 2-pass +mechanism or any prior repository analysis: if just feeds what it +finds into git-fast-import. This also implies that it heavily relies +on strictly linear ordering of changesets from hg, i.e. its +append-only storage model so that changesets hg-fast-export already +saw never get modified. + +Submitting Patches +------------------ + +Please use the issue-tracker at github +https://github.com/frej/fast-export to report bugs and submit +patches.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/hg-fast-export.py Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,474 @@ +#!/usr/bin/env python + +# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. +# License: MIT <http://www.opensource.org/licenses/mit-license.php> + +from mercurial import node +from hg2git import setup_repo,fixup_user,get_branch,get_changeset +from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name +from optparse import OptionParser +import re +import sys +import os + +if sys.platform == "win32": + # On Windows, sys.stdout is initially opened in text mode, which means that + # when a LF (\n) character is written to sys.stdout, it will be converted + # into CRLF (\r\n). That makes git blow up, so use this platform-specific + # code to change the mode of sys.stdout to binary. + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) + +# silly regex to catch Signed-off-by lines in log message +sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$') +# insert 'checkpoint' command after this many commits or none at all if 0 +cfg_checkpoint_count=0 +# write some progress message every this many file contents written +cfg_export_boundary=1000 + +def gitmode(flags): + return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' + +def wr_no_nl(msg=''): + if msg: + sys.stdout.write(msg) + +def wr(msg=''): + wr_no_nl(msg) + sys.stdout.write('\n') + #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n')) + +def checkpoint(count): + count=count+1 + if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0: + sys.stderr.write("Checkpoint after %d commits\n" % count) + wr('checkpoint') + wr() + return count + +def revnum_to_revref(rev, old_marks): + """Convert an hg revnum to a git-fast-import rev reference (an SHA1 + or a mark)""" + return old_marks.get(rev) or ':%d' % (rev+1) + +def file_mismatch(f1,f2): + """See if two revisions of a file are not equal.""" + return node.hex(f1)!=node.hex(f2) + +def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch): + """Loop over our repository and find all changed and missing files.""" + for left in dleft.keys(): + right=dright.get(left,None) + if right==None: + # we have the file but our parent hasn't: add to left set + l.append(left) + elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)): + # we have it but checksums mismatch: add to center set + c.append(left) + for right in dright.keys(): + left=dleft.get(right,None) + if left==None: + # if parent has file but we don't: add to right set + r.append(right) + # change is already handled when comparing child against parent + return l,c,r + +def get_filechanges(repo,revision,parents,mleft): + """Given some repository and revision, find all changed/deleted files.""" + l,c,r=[],[],[] + for p in parents: + if p<0: continue + mright=repo.changectx(p).manifest() + l,c,r=split_dict(mleft,mright,l,c,r) + l.sort() + c.sort() + r.sort() + return l,c,r + +def get_author(logmessage,committer,authors): + """As git distincts between author and committer of a patch, try to + extract author by detecting Signed-off-by lines. + + This walks from the end of the log message towards the top skipping + empty lines. Upon the first non-empty line, it walks all Signed-off-by + lines upwards to find the first one. For that (if found), it extracts + authorship information the usual way (authors table, cleaning, etc.) + + If no Signed-off-by line is found, this defaults to the committer. + + This may sound stupid (and it somehow is), but in log messages we + accidentially may have lines in the middle starting with + "Signed-off-by: foo" and thus matching our detection regex. Prevent + that.""" + + loglines=logmessage.split('\n') + i=len(loglines) + # from tail walk to top skipping empty lines + while i>=0: + i-=1 + if len(loglines[i].strip())==0: continue + break + if i>=0: + # walk further upwards to find first sob line, store in 'first' + first=None + while i>=0: + m=sob_re.match(loglines[i]) + if m==None: break + first=m + i-=1 + # if the last non-empty line matches our Signed-Off-by regex: extract username + if first!=None: + r=fixup_user(first.group(1),authors) + return r + return committer + +def export_file_contents(ctx,manifest,files,hgtags,encoding=''): + count=0 + max=len(files) + for file in files: + # Skip .hgtags files. They only get us in trouble. + if not hgtags and file == ".hgtags": + sys.stderr.write('Skip %s\n' % (file)) + continue + d=ctx.filectx(file).data() + if encoding: + filename=file.decode(encoding).encode('utf8') + else: + filename=file + wr('M %s inline %s' % (gitmode(manifest.flags(file)), + strip_leading_slash(filename))) + wr('data %d' % len(d)) # had some trouble with size() + wr(d) + count+=1 + if count%cfg_export_boundary==0: + sys.stderr.write('Exported %d/%d files\n' % (count,max)) + if max>cfg_export_boundary: + sys.stderr.write('Exported %d/%d files\n' % (count,max)) + +def sanitize_name(name,what="branch"): + """Sanitize input roughly according to git-check-ref-format(1)""" + + def dot(name): + if name[0] == '.': return '_'+name[1:] + return name + + n=name + p=re.compile('([[ ~^:?\\\\*]|\.\.)') + n=p.sub('_', n) + if n[-1] in ('/', '.'): n=n[:-1]+'_' + n='/'.join(map(dot,n.split('/'))) + p=re.compile('_+') + n=p.sub('_', n) + + if n!=name: + sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n)) + return n + +def strip_leading_slash(filename): + if filename[0] == '/': + return filename[1:] + return filename + +def export_commit(ui,repo,revision,old_marks,max,count,authors, + branchesmap,sob,brmap,hgtags,notes,encoding='',fn_encoding=''): + def get_branchname(name): + if brmap.has_key(name): + return brmap[name] + n=sanitize_name(branchesmap.get(name,name)) + brmap[name]=n + return n + + (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding) + + branch=get_branchname(branch) + + parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] + + if len(parents)==0 and revision != 0: + wr('reset refs/heads/%s' % branch) + + wr('commit refs/heads/%s' % branch) + wr('mark :%d' % (revision+1)) + if sob: + wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone)) + wr('committer %s %d %s' % (user,time,timezone)) + wr('data %d' % (len(desc)+1)) # wtf? + wr(desc) + wr() + + ctx=repo.changectx(str(revision)) + man=ctx.manifest() + added,changed,removed,type=[],[],[],'' + + if len(parents) == 0: + # first revision: feed in full manifest + added=man.keys() + added.sort() + type='full' + else: + wr('from %s' % revnum_to_revref(parents[0], old_marks)) + if len(parents) == 1: + # later non-merge revision: feed in changed manifest + # if we have exactly one parent, just take the changes from the + # manifest without expensively comparing checksums + f=repo.status(repo.lookup(parents[0]),revnode)[:3] + added,changed,removed=f[1],f[0],f[2] + type='simple delta' + else: # a merge with two parents + wr('merge %s' % revnum_to_revref(parents[1], old_marks)) + # later merge revision: feed in changed manifest + # for many files comparing checksums is expensive so only do it for + # merges where we really need it due to hg's revlog logic + added,changed,removed=get_filechanges(repo,revision,parents,man) + type='thorough delta' + + sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' % + (branch,type,revision+1,max,len(added),len(changed),len(removed))) + + if fn_encoding: + removed=[r.decode(fn_encoding).encode('utf8') for r in removed] + + removed=[strip_leading_slash(x) for x in removed] + + map(lambda r: wr('D %s' % r),removed) + export_file_contents(ctx,man,added,hgtags,fn_encoding) + export_file_contents(ctx,man,changed,hgtags,fn_encoding) + wr() + + count=checkpoint(count) + count=generate_note(user,time,timezone,revision,ctx,count,notes) + return count + +def generate_note(user,time,timezone,revision,ctx,count,notes): + if not notes: + return count + wr('commit refs/notes/hg') + wr('committer %s %d %s' % (user,time,timezone)) + wr('data 0') + wr('N inline :%d' % (revision+1)) + hg_hash=ctx.hex() + wr('data %d' % (len(hg_hash))) + wr_no_nl(hg_hash) + wr() + return checkpoint(count) + +def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap): + l=repo.tagslist() + for tag,node in l: + # Remap the branch name + tag=sanitize_name(tagsmap.get(tag,tag),"tag") + # ignore latest revision + if tag=='tip': continue + # ignore tags to nodes that are missing (ie, 'in the future') + if node.encode('hex_codec') not in mapping_cache: + sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec'))) + continue + + rev=int(mapping_cache[node.encode('hex_codec')]) + + ref=revnum_to_revref(rev, old_marks) + if ref==None: + sys.stderr.write('Failed to find reference for creating tag' + ' %s at r%d\n' % (tag,rev)) + continue + sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref)) + wr('reset refs/tags/%s' % tag) + wr('from %s' % ref) + wr() + count=checkpoint(count) + return count + +def load_mapping(name, filename): + cache={} + if not os.path.exists(filename): + return cache + f=open(filename,'r') + l=0 + a=0 + lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$') + for line in f.readlines(): + l+=1 + line=line.strip() + if line=='' or line[0]=='#': + continue + m=lre.match(line) + if m==None: + sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) + continue + # put key:value in cache, key without ^: + cache[m.group(1).strip()]=m.group(2).strip() + a+=1 + f.close() + sys.stderr.write('Loaded %d %s\n' % (a, name)) + return cache + +def branchtip(repo, heads): + '''return the tipmost branch head in heads''' + tip = heads[-1] + for h in reversed(heads): + if 'close' not in repo.changelog.read(h)[5]: + tip = h + break + return tip + +def verify_heads(ui,repo,cache,force): + branches={} + for bn, heads in repo.branchmap().iteritems(): + branches[bn] = branchtip(repo, heads) + l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()] + l.sort() + + # get list of hg's branches to verify, don't take all git has + for _,_,b in l: + b=get_branch(b) + sha1=get_git_sha1(b) + c=cache.get(b) + if sha1!=c: + sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:' + '\n%s (repo) != %s (cache)\n' % (b,sha1,c)) + if not force: return False + + # verify that branch has exactly one head + t={} + for h in repo.heads(): + (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h) + if t.get(branch,False): + sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' % + repo.changelog.rev(h)) + if not force: return False + t[branch]=True + + return True + +def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, + authors={},branchesmap={},tagsmap={}, + sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding=''): + _max=int(m) + + old_marks=load_cache(marksfile,lambda s: int(s)-1) + mapping_cache=load_cache(mappingfile) + heads_cache=load_cache(headsfile) + state_cache=load_cache(tipfile) + + ui,repo=setup_repo(repourl) + + if not verify_heads(ui,repo,heads_cache,force): + return 1 + + try: + tip=repo.changelog.count() + except AttributeError: + tip=len(repo) + + min=int(state_cache.get('tip',0)) + max=_max + if _max<0 or max>tip: + max=tip + + for rev in range(0,max): + (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) + mapping_cache[revnode.encode('hex_codec')] = str(rev) + + + c=0 + brmap={} + for rev in range(min,max): + c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, + sob,brmap,hgtags,notes,encoding,fn_encoding) + + state_cache['tip']=max + state_cache['repo']=repourl + save_cache(tipfile,state_cache) + save_cache(mappingfile,mapping_cache) + + c=export_tags(ui,repo,old_marks,mapping_cache,c,authors,tagsmap) + + sys.stderr.write('Issued %d commands\n' % c) + + return 0 + +if __name__=='__main__': + def bail(parser,opt): + sys.stderr.write('Error: No %s option given\n' % opt) + parser.print_help() + sys.exit(2) + + parser=OptionParser() + + parser.add_option("-m","--max",type="int",dest="max", + help="Maximum hg revision to import") + parser.add_option("--mapping",dest="mappingfile", + help="File to read last run's hg-to-git SHA1 mapping") + parser.add_option("--marks",dest="marksfile", + help="File to read git-fast-import's marks from") + parser.add_option("--heads",dest="headsfile", + help="File to read last run's git heads from") + parser.add_option("--status",dest="statusfile", + help="File to read status from") + parser.add_option("-r","--repo",dest="repourl", + help="URL of repo to import") + parser.add_option("-s",action="store_true",dest="sob", + default=False,help="Enable parsing Signed-off-by lines") + parser.add_option("--hgtags",action="store_true",dest="hgtags", + default=False,help="Enable exporting .hgtags files") + parser.add_option("-A","--authors",dest="authorfile", + help="Read authormap from AUTHORFILE") + parser.add_option("-B","--branches",dest="branchesfile", + help="Read branch map from BRANCHESFILE") + parser.add_option("-T","--tags",dest="tagsfile", + help="Read tags map from TAGSFILE") + parser.add_option("-f","--force",action="store_true",dest="force", + default=False,help="Ignore validation errors by force") + parser.add_option("-M","--default-branch",dest="default_branch", + help="Set the default branch") + parser.add_option("-o","--origin",dest="origin_name", + help="use <name> as namespace to track upstream") + parser.add_option("--hg-hash",action="store_true",dest="notes", + default=False,help="Annotate commits with the hg hash as git notes in the hg namespace") + parser.add_option("-e",dest="encoding", + help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>") + parser.add_option("--fe",dest="fn_encoding", + help="Assume file names from Mercurial are encoded in <filename_encoding>") + + (options,args)=parser.parse_args() + + m=-1 + if options.max!=None: m=options.max + + if options.marksfile==None: bail(parser,'--marks') + if options.mappingfile==None: bail(parser,'--mapping') + if options.headsfile==None: bail(parser,'--heads') + if options.statusfile==None: bail(parser,'--status') + if options.repourl==None: bail(parser,'--repo') + + a={} + if options.authorfile!=None: + a=load_mapping('authors', options.authorfile) + + b={} + if options.branchesfile!=None: + b=load_mapping('branches', options.branchesfile) + + t={} + if options.tagsfile!=None: + t=load_mapping('tags', options.tagsfile) + + if options.default_branch!=None: + set_default_branch(options.default_branch) + + if options.origin_name!=None: + set_origin_name(options.origin_name) + + encoding='' + if options.encoding!=None: + encoding=options.encoding + + fn_encoding=encoding + if options.fn_encoding!=None: + fn_encoding=options.fn_encoding + + sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile, + options.headsfile, options.statusfile, + authors=a,branchesmap=b,tagsmap=t, + sob=options.sob,force=options.force,hgtags=options.hgtags, + notes=options.notes,encoding=encoding,fn_encoding=fn_encoding))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/hg-fast-export.sh Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,145 @@ +#!/bin/sh + +# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. +# License: MIT <http://www.opensource.org/licenses/mit-license.php> + +ROOT="$(dirname "$(which "$0")")" +REPO="" +PFX="hg2git" +SFX_MAPPING="mapping" +SFX_MARKS="marks" +SFX_HEADS="heads" +SFX_STATE="state" +GFI_OPTS="" +PYTHON=${PYTHON:-python} + +USAGE="[--quiet] [-r <repo>] [--force] [-m <max>] [-s] [--hgtags] [-A <file>] [-B <file>] [-T <file>] [-M <name>] [-o <name>] [--hg-hash] [-e <encoding>]" +LONG_USAGE="Import hg repository <repo> up to either tip or <max> +If <repo> is omitted, use last hg repository as obtained from state file, +GIT_DIR/$PFX-$SFX_STATE by default. + +Note: The argument order matters. + +Options: + --quiet Passed to git-fast-import(1) + -r <repo> Mercurial repository to import + --force Ignore validation errors when converting, and pass --force + to git-fast-import(1) + -m <max> Maximum revision to import + -s Enable parsing Signed-off-by lines + --hgtags Enable exporting .hgtags files + -A <file> Read author map from file + (Same as in git-svnimport(1) and git-cvsimport(1)) + -B <file> Read branch map from file + -T <file> Read tags map from file + -M <name> Set the default branch name (defaults to 'master') + -o <name> Use <name> as branch namespace to track upstream (eg 'origin') + --hg-hash Annotate commits with the hg hash as git notes in the + hg namespace. + -e <encoding> Assume commit and author strings retrieved from + Mercurial are encoded in <encoding> + --fe <filename_encoding> Assume filenames from Mercurial are encoded + in <filename_encoding> +" +case "$1" in + -h|--help) + echo "usage: $(basename "$0") $USAGE" + echo "" + echo "$LONG_USAGE" + exit 0 +esac +. "$(git --exec-path)/git-sh-setup" +cd_to_toplevel + +while case "$#" in 0) break ;; esac +do + case "$1" in + -r|--r|--re|--rep|--repo) + shift + REPO="$1" + ;; + --q|--qu|--qui|--quie|--quiet) + GFI_OPTS="$GFI_OPTS --quiet" + ;; + --force) + # pass --force to git-fast-import and hg-fast-export.py + GFI_OPTS="$GFI_OPTS --force" + break + ;; + -*) + # pass any other options down to hg2git.py + break + ;; + *) + break + ;; + esac + shift +done + +# for convenience: get default repo from state file +if [ x"$REPO" = x -a -f "$GIT_DIR/$PFX-$SFX_STATE" ] ; then + REPO="`grep '^:repo ' "$GIT_DIR/$PFX-$SFX_STATE" | cut -d ' ' -f 2`" + echo "Using last hg repository \"$REPO\"" +fi + +if [ -z "$REPO" ]; then + echo "no repo given, use -r flag" + exit 1 +fi + +# make sure we have a marks cache +if [ ! -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then + touch "$GIT_DIR/$PFX-$SFX_MARKS" +fi + +# cleanup on exit +trap 'rm -f "$GIT_DIR/$PFX-$SFX_MARKS.old" "$GIT_DIR/$PFX-$SFX_MARKS.tmp"' 0 + +_err1= +_err2= +exec 3>&1 +{ read -r _err1 || :; read -r _err2 || :; } <<-EOT +$( + exec 4>&3 3>&1 1>&4 4>&- + { + _e1=0 + GIT_DIR="$GIT_DIR" $PYTHON "$ROOT/hg-fast-export.py" \ + --repo "$REPO" \ + --marks "$GIT_DIR/$PFX-$SFX_MARKS" \ + --mapping "$GIT_DIR/$PFX-$SFX_MAPPING" \ + --heads "$GIT_DIR/$PFX-$SFX_HEADS" \ + --status "$GIT_DIR/$PFX-$SFX_STATE" \ + "$@" 3>&- || _e1=$? + echo $_e1 >&3 + } | \ + { + _e2=0 + git fast-import $GFI_OPTS --export-marks="$GIT_DIR/$PFX-$SFX_MARKS.tmp" 3>&- || _e2=$? + echo $_e2 >&3 + } +) +EOT +exec 3>&- +[ "$_err1" = 0 -a "$_err2" = 0 ] || exit 1 + +# move recent marks cache out of the way... +if [ -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then + mv "$GIT_DIR/$PFX-$SFX_MARKS" "$GIT_DIR/$PFX-$SFX_MARKS.old" +else + touch "$GIT_DIR/$PFX-$SFX_MARKS.old" +fi + +# ...to create a new merged one +cat "$GIT_DIR/$PFX-$SFX_MARKS.old" "$GIT_DIR/$PFX-$SFX_MARKS.tmp" \ +| uniq > "$GIT_DIR/$PFX-$SFX_MARKS" + +# save SHA1s of current heads for incremental imports +# and connectivity (plus sanity checking) +for head in `git branch | sed 's#^..##'` ; do + id="`git rev-parse refs/heads/$head`" + echo ":$head $id" +done > "$GIT_DIR/$PFX-$SFX_HEADS" + +# check diff with color: +# ( for i in `find . -type f | grep -v '\.git'` ; do diff -u $i $REPO/$i ; done | cdiff ) | less -r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/hg-reset.py Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,135 @@ +#!/usr/bin/env python + +# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. +# License: GPLv2 + +from mercurial import node +from hg2git import setup_repo,load_cache,get_changeset,get_git_sha1 +from optparse import OptionParser +import sys + +def heads(ui,repo,start=None,stop=None,max=None): + # this is copied from mercurial/revlog.py and differs only in + # accepting a max argument for xrange(startrev+1,...) defaulting + # to the original repo.changelog.count() + if start is None: + start = node.nullid + if stop is None: + stop = [] + if max is None: + max = repo.changelog.count() + stoprevs = dict.fromkeys([repo.changelog.rev(n) for n in stop]) + startrev = repo.changelog.rev(start) + reachable = {startrev: 1} + heads = {startrev: 1} + + parentrevs = repo.changelog.parentrevs + for r in xrange(startrev + 1, max): + for p in parentrevs(r): + if p in reachable: + if r not in stoprevs: + reachable[r] = 1 + heads[r] = 1 + if p in heads and p not in stoprevs: + del heads[p] + + return [(repo.changelog.node(r),str(r)) for r in heads] + +def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max): + h=heads(ui,repo,max=max) + stale=dict.fromkeys(heads_cache) + changed=[] + unchanged=[] + for node,rev in h: + _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev) + del stale[branch] + git_sha1=get_git_sha1(branch) + cache_sha1=marks_cache.get(str(int(rev)+1)) + if git_sha1!=None and git_sha1==cache_sha1: + unchanged.append([branch,cache_sha1,rev,desc.split('\n')[0],user]) + else: + changed.append([branch,cache_sha1,rev,desc.split('\n')[0],user]) + changed.sort() + unchanged.sort() + return stale,changed,unchanged + +def get_tags(ui,repo,marks_cache,mapping_cache,max): + l=repo.tagslist() + good,bad=[],[] + for tag,node in l: + if tag=='tip': continue + rev=int(mapping_cache[node.encode('hex_codec')]) + cache_sha1=marks_cache.get(str(int(rev)+1)) + _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev) + if int(rev)>int(max): + bad.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user]) + else: + good.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user]) + good.sort() + bad.sort() + return good,bad + +def mangle_mark(mark): + return str(int(mark)-1) + +if __name__=='__main__': + def bail(parser,opt): + sys.stderr.write('Error: No option %s given\n' % opt) + parser.print_help() + sys.exit(2) + + parser=OptionParser() + + parser.add_option("--marks",dest="marksfile", + help="File to read git-fast-import's marks from") + parser.add_option("--mapping",dest="mappingfile", + help="File to read last run's hg-to-git SHA1 mapping") + parser.add_option("--heads",dest="headsfile", + help="File to read last run's git heads from") + parser.add_option("--status",dest="statusfile", + help="File to read status from") + parser.add_option("-r","--repo",dest="repourl", + help="URL of repo to import") + parser.add_option("-R","--revision",type=int,dest="revision", + help="Revision to reset to") + + (options,args)=parser.parse_args() + + if options.marksfile==None: bail(parser,'--marks option') + if options.mappingfile==None: bail(parser,'--mapping option') + if options.headsfile==None: bail(parser,'--heads option') + if options.statusfile==None: bail(parser,'--status option') + if options.repourl==None: bail(parser,'--repo option') + if options.revision==None: bail(parser,'-R/--revision') + + heads_cache=load_cache(options.headsfile) + marks_cache=load_cache(options.marksfile,mangle_mark) + state_cache=load_cache(options.statusfile) + mapping_cache = load_cache(options.mappingfile) + + l=int(state_cache.get('tip',options.revision)) + if options.revision+1>l: + sys.stderr.write('Revision is beyond last revision imported: %d>%d\n' % (options.revision,l)) + sys.exit(1) + + ui,repo=setup_repo(options.repourl) + + stale,changed,unchanged=get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,options.revision+1) + good,bad=get_tags(ui,repo,marks_cache,mapping_cache,options.revision+1) + + print "Possibly stale branches:" + map(lambda b: sys.stdout.write('\t%s\n' % b),stale.keys()) + + print "Possibly stale tags:" + map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),bad) + + print "Unchanged branches:" + map(lambda b: sys.stdout.write('\t%s (r%s)\n' % (b[0],b[2])),unchanged) + + print "Unchanged tags:" + map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),good) + + print "Reset branches in '%s' to:" % options.headsfile + map(lambda b: sys.stdout.write('\t:%s %s\n\t\t(r%s: %s: %s)\n' % (b[0],b[1],b[2],b[4],b[3])),changed) + + print "Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/hg-reset.sh Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,67 @@ +#!/bin/sh + +# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. +# License: MIT <http://www.opensource.org/licenses/mit-license.php> + +ROOT="`dirname $0`" +REPO="" +PFX="hg2git" +SFX_MARKS="marks" +SFX_MAPPING="mapping" +SFX_HEADS="heads" +SFX_STATE="state" +QUIET="" +PYTHON=${PYTHON:-python} + +USAGE="[-r <repo>] -R <rev>" +LONG_USAGE="Print SHA1s of latest changes per branch up to <rev> useful +to reset import and restart at <rev>. +If <repo> is omitted, use last hg repository as obtained from state file, +GIT_DIR/$PFX-$SFX_STATE by default. + +Options: + -R Hg revision to reset to + -r Mercurial repository to use +" + +. "$(git --exec-path)/git-sh-setup" +cd_to_toplevel + +while case "$#" in 0) break ;; esac +do + case "$1" in + -r|--r|--re|--rep|--repo) + shift + REPO="$1" + ;; + -*) + # pass any other options down to hg2git.py + break + ;; + *) + break + ;; + esac + shift +done + +# for convenience: get default repo from state file +if [ x"$REPO" = x -a -f "$GIT_DIR/$PFX-$SFX_STATE" ] ; then + REPO="`grep '^:repo ' "$GIT_DIR/$PFX-$SFX_STATE" | cut -d ' ' -f 2`" + echo "Using last hg repository \"$REPO\"" +fi + +# make sure we have a marks cache +if [ ! -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then + touch "$GIT_DIR/$PFX-$SFX_MARKS" +fi + +GIT_DIR="$GIT_DIR" $PYTHON "$ROOT/hg-reset.py" \ + --repo "$REPO" \ + --marks "$GIT_DIR/$PFX-$SFX_MARKS" \ + --mapping "$GIT_DIR/$PFX-$SFX_MAPPING" \ + --heads "$GIT_DIR/$PFX-$SFX_HEADS" \ + --status "$GIT_DIR/$PFX-$SFX_STATE" \ + "$@" + +exit $?
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/hg2git.py Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,116 @@ +#!/usr/bin/env python + +# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. +# License: MIT <http://www.opensource.org/licenses/mit-license.php> + +from mercurial import hg,util,ui,templatefilters +import re +import os +import sys + +# default git branch name +cfg_master='master' +# default origin name +origin_name='' +# silly regex to see if user field has email address +user_re=re.compile('([^<]+) (<[^>]*>)$') +# silly regex to clean out user names +user_clean_re=re.compile('^["]([^"]+)["]$') + +def set_default_branch(name): + global cfg_master + cfg_master = name + +def set_origin_name(name): + global origin_name + origin_name = name + +def setup_repo(url): + try: + myui=ui.ui(interactive=False) + except TypeError: + myui=ui.ui() + myui.setconfig('ui', 'interactive', 'off') + return myui,hg.repository(myui,url) + +def fixup_user(user,authors): + user=user.strip("\"") + if authors!=None: + # if we have an authors table, try to get mapping + # by defaulting to the current value of 'user' + user=authors.get(user,user) + name,mail,m='','',user_re.match(user) + if m==None: + # if we don't have 'Name <mail>' syntax, extract name + # and mail from hg helpers. this seems to work pretty well. + # if email doesn't contain @, replace it with devnull@localhost + name=templatefilters.person(user) + mail='<%s>' % util.email(user) + if '@' not in mail: + mail = '<devnull@localhost>' + else: + # if we have 'Name <mail>' syntax, everything is fine :) + name,mail=m.group(1),m.group(2) + + # remove any silly quoting from username + m2=user_clean_re.match(name) + if m2!=None: + name=m2.group(1) + return '%s %s' % (name,mail) + +def get_branch(name): + # 'HEAD' is the result of a bug in mutt's cvs->hg conversion, + # other CVS imports may need it, too + if name=='HEAD' or name=='default' or name=='': + name=cfg_master + if origin_name: + return origin_name + '/' + name + return name + +def get_changeset(ui,repo,revision,authors={},encoding=''): + node=repo.lookup(revision) + (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node) + if encoding: + user=user.decode(encoding).encode('utf8') + desc=desc.decode(encoding).encode('utf8') + tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60)) + branch=get_branch(extra.get('branch','master')) + return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra) + +def mangle_key(key): + return key + +def load_cache(filename,get_key=mangle_key): + cache={} + if not os.path.exists(filename): + return cache + f=open(filename,'r') + l=0 + for line in f.readlines(): + l+=1 + fields=line.split(' ') + if fields==None or not len(fields)==2 or fields[0][0]!=':': + sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) + continue + # put key:value in cache, key without ^: + cache[get_key(fields[0][1:])]=fields[1].split('\n')[0] + f.close() + return cache + +def save_cache(filename,cache): + f=open(filename,'w+') + map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys()) + f.close() + +def get_git_sha1(name,type='heads'): + try: + # use git-rev-parse to support packed refs + cmd="git rev-parse --verify refs/%s/%s 2>%s" % (type,name,os.devnull) + p=os.popen(cmd) + l=p.readline() + p.close() + if l == None or len(l) == 0: + return None + return l[0:40] + except IOError: + return None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/svn-archive.c Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,240 @@ +/* + * svn-archive.c + * ---------- + * Walk through a given revision of a local Subversion repository and export + * all of the contents as a tarfile. + * + * Author: Chris Lee <clee@kde.org> + * License: MIT <http://www.opensource.org/licenses/mit-license.php> + */ + +#define _XOPEN_SOURCE +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <time.h> + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#include <apr_general.h> +#include <apr_strings.h> +#include <apr_getopt.h> +#include <apr_lib.h> + +#include <svn_types.h> +#include <svn_pools.h> +#include <svn_repos.h> +#include <svn_fs.h> + +#undef SVN_ERR +#define SVN_ERR(expr) SVN_INT_ERR(expr) +#define apr_sane_push(arr, contents) *(char **)apr_array_push(arr) = contents + +#define TRUNK "/trunk" + +static time_t archive_time; + +time_t get_epoch(char *svn_date) +{ + struct tm tm = {0}; + char *date = malloc(strlen(svn_date) * sizeof(char *)); + strncpy(date, svn_date, strlen(svn_date) - 8); + strptime(date, "%Y-%m-%dT%H:%M:%S", &tm); + free(date); + return mktime(&tm); +} + +int tar_header(apr_pool_t *pool, char *path, char *node, size_t f_size) +{ + char buf[512]; + unsigned int i, checksum; + svn_boolean_t is_dir; + + memset(buf, 0, sizeof(buf)); + + if ((strlen(path) == 0) && (strlen(node) == 0)) { + return 0; + } + + if (strlen(node) == 0) { + is_dir = 1; + } else { + is_dir = 0; + } + + if (strlen(path) == 0) { + strncpy(buf, apr_psprintf(pool, "%s", node), 99); + } else if (strlen(path) + strlen(node) < 100) { + strncpy(buf, apr_psprintf(pool, "%s/%s", path+1, node), 99); + } else { + fprintf(stderr, "really long file path...\n"); + strncpy(&buf[0], node, 99); + strncpy(&buf[345], path+1, 154); + } + + strncpy(&buf[100], apr_psprintf(pool, "%07o", (is_dir ? 0755 : 0644)), 7); + strncpy(&buf[108], apr_psprintf(pool, "%07o", 1000), 7); + strncpy(&buf[116], apr_psprintf(pool, "%07o", 1000), 7); + strncpy(&buf[124], apr_psprintf(pool, "%011lo", f_size), 11); + strncpy(&buf[136], apr_psprintf(pool, "%011lo", archive_time), 11); + strncpy(&buf[156], (is_dir ? "5" : "0"), 1); + strncpy(&buf[257], "ustar ", 8); + strncpy(&buf[265], "clee", 31); + strncpy(&buf[297], "clee", 31); + // strncpy(&buf[329], apr_psprintf(pool, "%07o", 0), 7); + // strncpy(&buf[337], apr_psprintf(pool, "%07o", 0), 7); + + strncpy(&buf[148], " ", 8); + checksum = 0; + for (i = 0; i < sizeof(buf); i++) { + checksum += buf[i]; + } + strncpy(&buf[148], apr_psprintf(pool, "%07o", checksum & 0x1fffff), 7); + + fwrite(buf, sizeof(char), sizeof(buf), stdout); + + return 0; +} + +int tar_footer() +{ + char block[1024]; + memset(block, 0, sizeof(block)); + fwrite(block, sizeof(char), sizeof(block), stdout); +} + +int dump_blob(svn_fs_root_t *root, char *prefix, char *path, char *node, apr_pool_t *pool) +{ + char *full_path, buf[512]; + apr_size_t len; + svn_stream_t *stream; + svn_filesize_t stream_length; + + full_path = apr_psprintf(pool, "%s%s/%s", prefix, path, node); + + SVN_ERR(svn_fs_file_length(&stream_length, root, full_path, pool)); + SVN_ERR(svn_fs_file_contents(&stream, root, full_path, pool)); + + tar_header(pool, path, node, stream_length); + + do { + len = sizeof(buf); + memset(buf, '\0', sizeof(buf)); + SVN_ERR(svn_stream_read(stream, buf, &len)); + fwrite(buf, sizeof(char), sizeof(buf), stdout); + } while (len == sizeof(buf)); + + return 0; +} + +int dump_tree(svn_fs_root_t *root, char *prefix, char *path, apr_pool_t *pool) +{ + const void *key; + void *val; + char *node, *subpath, *full_path; + + apr_pool_t *subpool; + apr_hash_t *dir_entries; + apr_hash_index_t *i; + + svn_boolean_t is_dir; + + tar_header(pool, path, "", 0); + + SVN_ERR(svn_fs_dir_entries(&dir_entries, root, apr_psprintf(pool, "%s/%s", prefix, path), pool)); + + subpool = svn_pool_create(pool); + + for (i = apr_hash_first(pool, dir_entries); i; i = apr_hash_next(i)) { + svn_pool_clear(subpool); + apr_hash_this(i, &key, NULL, &val); + node = (char *)key; + + subpath = apr_psprintf(subpool, "%s/%s", path, node); + full_path = apr_psprintf(subpool, "%s%s", prefix, subpath); + + svn_fs_is_dir(&is_dir, root, full_path, subpool); + + if (is_dir) { + dump_tree(root, prefix, subpath, subpool); + } else { + dump_blob(root, prefix, path, node, subpool); + } + } + + svn_pool_destroy(subpool); + + return 0; +} + +int crawl_filesystem(char *repos_path, char *root_path, apr_pool_t *pool) +{ + char *path; + + apr_hash_t *props; + apr_hash_index_t *i; + + svn_repos_t *repos; + svn_fs_t *fs; + svn_string_t *svndate; + svn_revnum_t youngest_rev, export_rev; + svn_fs_root_t *fs_root; + + SVN_ERR(svn_fs_initialize(pool)); + SVN_ERR(svn_repos_open(&repos, repos_path, pool)); + if ((fs = svn_repos_fs(repos)) == NULL) + return -1; + SVN_ERR(svn_fs_youngest_rev(&youngest_rev, fs, pool)); + + export_rev = youngest_rev; + + SVN_ERR(svn_fs_revision_root(&fs_root, fs, export_rev, pool)); + SVN_ERR(svn_fs_revision_proplist(&props, fs, export_rev, pool)); + + svndate = apr_hash_get(props, "svn:date", APR_HASH_KEY_STRING); + archive_time = get_epoch((char *)svndate->data); + + fprintf(stderr, "Exporting archive of r%ld... \n", export_rev); + + dump_tree(fs_root, root_path, "", pool); + + tar_footer(); + + fprintf(stderr, "done!\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + apr_pool_t *pool; + apr_getopt_t *options; + + apr_getopt_option_t long_options[] = { + { "help", 'h', 0 }, + { "prefix", 'p', 0 }, + { "basename", 'b', 0 }, + { "revision", 'r', 0 }, + { NULL, 0, 0 } + }; + + if (argc < 2) { + fprintf(stderr, "usage: %s REPOS_PATH [prefix]\n", argv[0]); + return -1; + } + + if (apr_initialize() != APR_SUCCESS) { + fprintf(stderr, "You lose at apr_initialize().\n"); + return -1; + } + + pool = svn_pool_create(NULL); + + crawl_filesystem(argv[1], (argc == 3 ? argv[2] : TRUNK), pool); + + apr_terminate(); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/svn-fast-export.c Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,187 @@ +/* + * svn-fast-export.c + * ---------- + * Walk through each revision of a local Subversion repository and export it + * in a stream that git-fast-import can consume. + * + * Author: Chris Lee <clee@kde.org> + * License: MIT <http://www.opensource.org/licenses/mit-license.php> + */ + +#define _XOPEN_SOURCE +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <time.h> + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#include <apr_lib.h> +#include <apr_getopt.h> +#include <apr_general.h> + +#include <svn_fs.h> +#include <svn_repos.h> +#include <svn_pools.h> +#include <svn_types.h> + +#undef SVN_ERR +#define SVN_ERR(expr) SVN_INT_ERR(expr) +#define apr_sane_push(arr, contents) *(char **)apr_array_push(arr) = contents + +#define TRUNK "/trunk/" + +time_t get_epoch(char *svn_date) +{ + struct tm tm = {0}; + char *date = malloc(strlen(svn_date) * sizeof(char *)); + strncpy(date, svn_date, strlen(svn_date) - 8); + strptime(date, "%Y-%m-%dT%H:%M:%S", &tm); + free(date); + return mktime(&tm); +} + +int dump_blob(svn_fs_root_t *root, char *full_path, apr_pool_t *pool) +{ + apr_size_t len; + svn_stream_t *stream, *outstream; + svn_filesize_t stream_length; + + SVN_ERR(svn_fs_file_length(&stream_length, root, full_path, pool)); + SVN_ERR(svn_fs_file_contents(&stream, root, full_path, pool)); + + fprintf(stdout, "data %lu\n", stream_length); + fflush(stdout); + + SVN_ERR(svn_stream_for_stdout(&outstream, pool)); + SVN_ERR(svn_stream_copy(stream, outstream, pool)); + + fprintf(stdout, "\n"); + fflush(stdout); + + return 0; +} + +int export_revision(svn_revnum_t rev, svn_fs_t *fs, apr_pool_t *pool) +{ + unsigned int mark; + const void *key; + void *val; + char *path, *file_change; + apr_pool_t *revpool; + apr_hash_t *changes, *props; + apr_hash_index_t *i; + apr_array_header_t *file_changes; + svn_string_t *author, *committer, *svndate, *svnlog; + svn_boolean_t is_dir; + svn_fs_root_t *fs_root; + svn_fs_path_change_t *change; + + fprintf(stderr, "Exporting revision %ld... ", rev); + + SVN_ERR(svn_fs_revision_root(&fs_root, fs, rev, pool)); + SVN_ERR(svn_fs_paths_changed(&changes, fs_root, pool)); + SVN_ERR(svn_fs_revision_proplist(&props, fs, rev, pool)); + + revpool = svn_pool_create(pool); + + file_changes = apr_array_make(pool, apr_hash_count(changes), sizeof(char *)); + mark = 1; + for (i = apr_hash_first(pool, changes); i; i = apr_hash_next(i)) { + svn_pool_clear(revpool); + apr_hash_this(i, &key, NULL, &val); + path = (char *)key; + change = (svn_fs_path_change_t *)val; + + SVN_ERR(svn_fs_is_dir(&is_dir, fs_root, path, revpool)); + + if (is_dir || strncmp(TRUNK, path, strlen(TRUNK))) { + continue; + } + + if (change->change_kind == svn_fs_path_change_delete) { + apr_sane_push(file_changes, (char *)svn_string_createf(pool, "D %s", path + strlen(TRUNK))->data); + } else { + apr_sane_push(file_changes, (char *)svn_string_createf(pool, "M 644 :%u %s", mark, path + strlen(TRUNK))->data); + fprintf(stdout, "blob\nmark :%u\n", mark++); + dump_blob(fs_root, (char *)path, revpool); + } + } + + if (file_changes->nelts == 0) { + fprintf(stderr, "skipping.\n"); + svn_pool_destroy(revpool); + return 0; + } + + author = apr_hash_get(props, "svn:author", APR_HASH_KEY_STRING); + if (svn_string_isempty(author)) + author = svn_string_create("nobody", pool); + svndate = apr_hash_get(props, "svn:date", APR_HASH_KEY_STRING); + svnlog = apr_hash_get(props, "svn:log", APR_HASH_KEY_STRING); + + fprintf(stdout, "commit refs/heads/master\n"); + fprintf(stdout, "committer %s <%s@localhost> %ld -0000\n", author->data, author->data, get_epoch((char *)svndate->data)); + fprintf(stdout, "data %d\n", svnlog->len); + fputs(svnlog->data, stdout); + fprintf(stdout, "\n"); + fputs(apr_array_pstrcat(pool, file_changes, '\n'), stdout); + fprintf(stdout, "\n\n"); + fflush(stdout); + + svn_pool_destroy(revpool); + + fprintf(stderr, "done!\n"); + + return 0; +} + +int crawl_revisions(char *repos_path) +{ + apr_pool_t *pool, *subpool; + svn_fs_t *fs; + svn_repos_t *repos; + svn_revnum_t youngest_rev, min_rev, max_rev, rev; + + pool = svn_pool_create(NULL); + + SVN_ERR(svn_fs_initialize(pool)); + SVN_ERR(svn_repos_open(&repos, repos_path, pool)); + if ((fs = svn_repos_fs(repos)) == NULL) + return -1; + SVN_ERR(svn_fs_youngest_rev(&youngest_rev, fs, pool)); + + min_rev = 1; + max_rev = youngest_rev; + + subpool = svn_pool_create(pool); + for (rev = min_rev; rev <= max_rev; rev++) { + svn_pool_clear(subpool); + export_revision(rev, fs, subpool); + } + + svn_pool_destroy(pool); + + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, "usage: %s REPOS_PATH\n", argv[0]); + return -1; + } + + if (apr_initialize() != APR_SUCCESS) { + fprintf(stderr, "You lose at apr_initialize().\n"); + return -1; + } + + crawl_revisions(argv[1]); + + apr_terminate(); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/fast-export/svn-fast-export.py Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,160 @@ +#!/usr/bin/python +# +# svn-fast-export.py +# ---------- +# Walk through each revision of a local Subversion repository and export it +# in a stream that git-fast-import can consume. +# +# Author: Chris Lee <clee@kde.org> +# License: MIT <http://www.opensource.org/licenses/mit-license.php> + +trunk_path = '/trunk/' +branches_path = '/branches/' +tags_path = '/tags/' + +first_rev = 1 +final_rev = 0 + +import sys, os.path +from optparse import OptionParser +from time import mktime, strptime +from svn.fs import svn_fs_file_length, svn_fs_file_contents, svn_fs_is_dir, svn_fs_revision_root, svn_fs_youngest_rev, svn_fs_revision_proplist, svn_fs_paths_changed +from svn.core import svn_pool_create, svn_pool_clear, svn_pool_destroy, svn_stream_for_stdout, svn_stream_copy, svn_stream_close, run_app +from svn.repos import svn_repos_open, svn_repos_fs + +ct_short = ['M', 'A', 'D', 'R', 'X'] + +def dump_file_blob(root, full_path, pool): + stream_length = svn_fs_file_length(root, full_path, pool) + stream = svn_fs_file_contents(root, full_path, pool) + sys.stdout.write("data %s\n" % stream_length) + sys.stdout.flush() + ostream = svn_stream_for_stdout(pool) + svn_stream_copy(stream, ostream, pool) + svn_stream_close(ostream) + sys.stdout.write("\n") + + +def export_revision(rev, repo, fs, pool): + sys.stderr.write("Exporting revision %s... " % rev) + + revpool = svn_pool_create(pool) + svn_pool_clear(revpool) + + # Open a root object representing the youngest (HEAD) revision. + root = svn_fs_revision_root(fs, rev, revpool) + + # And the list of what changed in this revision. + changes = svn_fs_paths_changed(root, revpool) + + i = 1 + marks = {} + file_changes = [] + + for path, change_type in changes.iteritems(): + c_t = ct_short[change_type.change_kind] + if svn_fs_is_dir(root, path, revpool): + continue + + if not path.startswith(trunk_path): + # We don't handle branches. Or tags. Yet. + pass + else: + if c_t == 'D': + file_changes.append("D %s" % path.replace(trunk_path, '')) + else: + marks[i] = path.replace(trunk_path, '') + file_changes.append("M 644 :%s %s" % (i, marks[i])) + sys.stdout.write("blob\nmark :%s\n" % i) + dump_file_blob(root, path, revpool) + i += 1 + + # Get the commit author and message + props = svn_fs_revision_proplist(fs, rev, revpool) + + # Do the recursive crawl. + if props.has_key('svn:author'): + author = "%s <%s@localhost>" % (props['svn:author'], props['svn:author']) + else: + author = 'nobody <nobody@localhost>' + + if len(file_changes) == 0: + svn_pool_destroy(revpool) + sys.stderr.write("skipping.\n") + return + + svndate = props['svn:date'][0:-8] + commit_time = mktime(strptime(svndate, '%Y-%m-%dT%H:%M:%S')) + sys.stdout.write("commit refs/heads/master\n") + sys.stdout.write("committer %s %s -0000\n" % (author, int(commit_time))) + sys.stdout.write("data %s\n" % len(props['svn:log'])) + sys.stdout.write(props['svn:log']) + sys.stdout.write("\n") + sys.stdout.write('\n'.join(file_changes)) + sys.stdout.write("\n\n") + + svn_pool_destroy(revpool) + + sys.stderr.write("done!\n") + + #if rev % 1000 == 0: + # sys.stderr.write("gc: %s objects\n" % len(gc.get_objects())) + # sleep(5) + + +def crawl_revisions(pool, repos_path): + """Open the repository at REPOS_PATH, and recursively crawl all its + revisions.""" + global final_rev + + # Open the repository at REPOS_PATH, and get a reference to its + # versioning filesystem. + repos_obj = svn_repos_open(repos_path, pool) + fs_obj = svn_repos_fs(repos_obj) + + # Query the current youngest revision. + youngest_rev = svn_fs_youngest_rev(fs_obj, pool) + + + first_rev = 1 + if final_rev == 0: + final_rev = youngest_rev + for rev in xrange(first_rev, final_rev + 1): + export_revision(rev, repos_obj, fs_obj, pool) + + +if __name__ == '__main__': + usage = '%prog [options] REPOS_PATH' + parser = OptionParser() + parser.set_usage(usage) + parser.add_option('-f', '--final-rev', help='Final revision to import', + dest='final_rev', metavar='FINAL_REV', type='int') + parser.add_option('-t', '--trunk-path', help='Path in repo to /trunk', + dest='trunk_path', metavar='TRUNK_PATH') + parser.add_option('-b', '--branches-path', help='Path in repo to /branches', + dest='branches_path', metavar='BRANCHES_PATH') + parser.add_option('-T', '--tags-path', help='Path in repo to /tags', + dest='tags_path', metavar='TAGS_PATH') + (options, args) = parser.parse_args() + + if options.trunk_path != None: + trunk_path = options.trunk_path + if options.branches_path != None: + branches_path = options.branches_path + if options.tags_path != None: + tags_path = options.tags_path + if options.final_rev != None: + final_rev = options.final_rev + + if len(args) != 1: + parser.print_help() + sys.exit(2) + + # Canonicalize (enough for Subversion, at least) the repository path. + repos_path = os.path.normpath(args[0]) + if repos_path == '.': + repos_path = '' + + # Call the app-wrapper, which takes care of APR initialization/shutdown + # and the creation and cleanup of our top-level memory pool. + run_app(crawl_revisions, repos_path)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/soundsoftware/create-repo-authormaps.rb Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,129 @@ +#!/usr/bin/env ruby + +# Create authormap files for hg repos based on the changeset & project +# member info available to Redmine. +# +# We have a set of hg repos in a given directory: +# +# /var/hg/repo_1 +# /var/hg/repo_2 +# /var/hg/repo_3 +# +# and we want to produce authormap files in another directory: +# +# /var/repo-export/authormap/authormap_repo_1 +# /var/repo-export/authormap/authormap_repo_2 +# /var/repo-export/authormap/authormap_repo_3 +# +# This script does that, if given the two directory names as arguments +# to the -s and -o options. In the above example: +# +# ./script/rails runner -e production extra/soundsoftware/create-repo-authormaps.rb -s /var/hg -o /var/repo-export/authormap +# +# Note that this script will overwrite any existing authormap +# files. (That's why the output files are given an authormap_ prefix, +# so we're less likely to clobber something else if the user gets the +# arguments wrong.) + +require 'getoptlong' + +opts = GetoptLong.new( + ['--scm-dir', '-s', GetoptLong::REQUIRED_ARGUMENT], + ['--out-dir', '-o', GetoptLong::REQUIRED_ARGUMENT], + ['--environment', '-e', GetoptLong::OPTIONAL_ARGUMENT] +) + +$repos_base = '' +$out_base = '' + +def usage + puts "See source code for supported options" + exit +end + +begin + opts.each do |opt, arg| + case opt + when '--scm-dir'; $repos_base = arg.dup + when '--out-dir'; $out_base = arg.dup + end + end +rescue + exit 1 +end + +if ($repos_base.empty? or $out_base.empty?) + usage +end + +unless File.directory?($repos_base) + puts "input directory '#{$repos_base}' doesn't exist" + exit 1 +end + +unless File.directory?($out_base) + puts "output directory '#{$out_base}' doesn't exist" + exit 1 +end + +projects = Project.find(:all) + +if projects.nil? + puts 'No projects found' + exit 1 +end + +projects.each do |proj| + + next unless proj.is_public + + next unless proj.respond_to?(:repository) + + repo = proj.repository + next if repo.nil? or repo.url.empty? + + repo_url = repo.url + repo_url = repo_url.gsub(/^file:\/*/, "/"); + if repo_url != File.join($repos_base, proj.identifier) + puts "Project #{proj.identifier} has repo in unsupported location #{repo_url}, skipping" + next + end + + committers = repo.committers + + authormap = "" + committers.each do |c, uid| + + # Some of our repos have broken email addresses in them: e.g. one + # changeset has a committer name of the form + # + # NAME <name <NAME <name@example.com"> + # + # I don't know how it got like that... If the committer has more + # than one '<' in it, truncate it just before the first one, and + # then look up the author name again. + # + if c =~ /<.*</ then + # So this is a completely pathological case + user = User.find_by_id uid + if user.nil? then + # because the given committer is bogus, we must write something in the map + name = c.sub(/\s*<.*$/, "") + authormap << "#{c}=#{name} <unknown@example.com>\n" + else + authormap << "#{c}=#{user.name} <#{user.mail}>\n" + end + elsif not c =~ /[^<]+<.*@.*>/ then + # This is the "normal" case that needs work, where a user has + # their name in the commit but no email address + user = User.find_by_id uid + authormap << "#{c}=#{user.name} <#{user.mail}>\n" unless user.nil? + end + end + + File.open(File.join($out_base, "authormap_#{proj.identifier}"), "w") do |f| + f.puts(authormap) + end + +end +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/soundsoftware/export-git.sh Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,123 @@ +#!/bin/bash + +set -e + +progdir=$(dirname $0) +case "$progdir" in + /*) ;; + *) progdir="$(pwd)/$progdir" ;; +esac + +rails_scriptdir="$progdir/../../script" +rails="$rails_scriptdir/rails" + +if [ ! -x "$rails" ]; then + echo "Expected to find rails executable at $rails" + exit 2 +fi + +fastexport="$progdir/../fast-export/hg-fast-export.sh" +if [ ! -x "$fastexport" ]; then + echo "Expected to find hg-fast-export.sh executable at $fastexport" + exit 2 +fi + +environment="$1" +hgdir="$2" +gitdir="$3" + +if [ -z "$hgdir" ] || [ -z "$gitdir" ]; then + echo "Usage: $0 <environment> <hgdir> <gitdir>" + echo " where" + echo " - environment is the Rails environment (development or production)" + echo " - hgdir is the directory containing project Mercurial repositories" + echo " - gitdir is the directory in which output git repositories are to be" + echo " created or updated" + exit 2 +fi + +if [ ! -d "$hgdir" ]; then + echo "Mercurial repository directory $hgdir not found" + exit 1 +fi + +if [ ! -d "$gitdir" ]; then + echo "Target git repository dir $gitdir not found (please create at least the empty directory)" + exit 1 +fi + +set -u + +authordir="$gitdir/__AUTHORMAPS" +mkdir -p "$authordir" + +wastedir="$gitdir/__WASTE" +mkdir -p "$wastedir" + +echo +echo "$0 starting at $(date)" + +echo "Extracting author maps..." + +# Delete any existing authormap files, because we want to ensure we +# don't have an authormap for any project that was exportable but has +# become non-exportable (e.g. has gone private) +rm -f "$authordir/*" + +"$rails" runner -e "$environment" "$progdir/create-repo-authormaps.rb" \ + -s "$hgdir" -o "$authordir" + +for hgrepo in "$hgdir"/*; do + + if [ ! -d "$hgrepo/.hg" ]; then + echo "Directory $hgrepo does not appear to be a Mercurial repo, skipping" + continue + fi + + reponame=$(basename "$hgrepo") + authormap="$authordir/authormap_$reponame" + + git_repodir="$gitdir/$reponame" + + if [ ! -f "$authormap" ]; then + echo "No authormap file was created for repo $reponame, skipping" + + # If there is no authormap file, then we should not have a git + # mirror -- this is a form of access control, not just an + # optimisation (authormap files are expected to exist for all + # exportable projects, even if empty). So if a git mirror + # exists, we move it away + if [ -d "$git_repodir" ]; then + mv "$git_repodir" "$wastedir/$(date +%s).$reponame" + fi + + continue + fi + + if [ ! -d "$git_repodir" ]; then + git init --bare "$git_repodir" + fi + + echo + echo "About to run fast export for repo $reponame..." + + ( + cd "$git_repodir" + + # Force is necessary because git-fast-import (or git) can't handle + # branches having more than one head ("Error: repository has at + # least one unnamed head"), which happens from time to time in + # valid Hg repos. With --force apparently it will just pick one + # of the two heads arbitrarily, which is also alarming but is + # more likely to be useful + "$fastexport" --quiet -r "$hgrepo" --hgtags -A "$authormap" --force + + git update-server-info + ) + + echo "Fast export done" + +done + +echo "$0 finishing at $(date)" +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/soundsoftware/flatten.sh Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,82 @@ +#!/bin/bash +# +# Convert an Hg repo with subrepos into a new repo in which the +# subrepo contents are included in the main repo. The history of the +# original and its subrepos is retained. +# +# Note that this script invokes itself, in order to handle nested +# subrepos. +# +# While this does work, I'm not convinced it's entirely a good +# idea. The history ends up a bit of a mess, and if it's a preliminary +# to converting to git (which is one obvious reason to do this), the +# history ends up even messier after that conversion. + +set -ex + +repo="$1" +target="$2" +target_subdir="$3" +revision="$4" + +if [ -z "$repo" ] || [ -z "$target" ]; then + echo "usage: $0 <repo-url> <target-dir> [<target-subdir> <revision>]" + exit 2 +fi + +set -u + +myname="$0" +mydir=$(dirname "$myname") + +reponame=$(basename "$repo") +tmpdir="/tmp/flatten_$$" +mkdir -p "$tmpdir" +trap "rm -rf ${tmpdir}" 0 + +filemap="$tmpdir/filemap" +tmprepo="$tmpdir/tmprepo" +subtmp="$tmpdir/subtmp" + +if [ -n "$revision" ]; then + hg clone -r "$revision" "$repo" "$tmprepo" +else + hg clone "$repo" "$tmprepo" +fi + +read_sub() { + if [ -f "$tmprepo/.hgsub" ]; then + cat "$tmprepo/.hgsub" | sed 's/ *= */,/' + fi +} + +( echo "exclude .hgsub" + echo "exclude .hgsubstate" + read_sub | while IFS=, read dir uri; do + echo "exclude $dir" + done + if [ -n "$target_subdir" ]; then + echo "rename . $target_subdir" + fi +) > "$filemap" + +hg convert --filemap "$filemap" "$tmprepo" "$target" +( cd "$target" + hg update +) + +read_sub | while IFS=, read dir uri; do + rm -rf "$subtmp" + revision=$(grep ' '"$dir"'$' "$tmprepo/.hgsubstate" | awk '{ print $1; }') + if [ -n "$target_subdir" ]; then + "$myname" "$tmprepo/$dir" "$subtmp" "$target_subdir/$dir" "$revision" + else + "$myname" "$tmprepo/$dir" "$subtmp" "$dir" "$revision" + fi + ( cd "$target" + hg pull -f "$subtmp" && + hg merge --tool internal:local && + hg commit -m "Merge former subrepo $dir" + ) +done +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/soundsoftware/get-repo-authormap.rb Thu Feb 04 08:47:09 2016 +0000 @@ -0,0 +1,19 @@ + +# Print out an authormap file for hg-to-git conversion using +# hg-fast-export +# +# Invoke with the project identifier as argument, e.g. +# +# ./script/rails runner -e production extra/soundsoftware/get-repo-authormap.rb soundsoftware-site + +proj_ident = ARGV.last +proj = Project.find_by_identifier(proj_ident) +repo = Repository.where(:project_id => proj.id).first +csets = Changeset.where(:repository_id => repo.id) +committers = csets.map do |c| c.committer end.sort.uniq +committers.each do |c| + if not c =~ /[^<]+<.*@.*>/ then + u = repo.find_committer_user c + print "#{c}=#{u.name} <#{u.mail}>\n" unless u.nil? + end +end