annotate sworduploader.py @ 3:c1918aa337c4

v0.2 small bug fixes
author Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk>
date Wed, 28 Mar 2012 17:48:59 +0100
parents c72d6b5d58bc
children 96d62e78ac9f
rev   line source
marco@0 1 #!usr/bin/env/ python
marco@1 2
marco@3 3 """ SWORD2 DSpace bulk uploader - v0.2
marco@1 4
marco@1 5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
marco@1 6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
marco@1 7
marco@1 8 Dependencies:
marco@1 9
marco@1 10 - python 2.X
marco@1 11
marco@1 12 - sword2 library: https://bitbucket.org/beno/python-sword2/src
marco@1 13
marco@1 14 -----------------------------------
marco@1 15 Copyright 2012 Marco Fabiani
marco@1 16 Copyright 2012 Queen Mary, University of London
marco@1 17 -----------------------------------
marco@1 18 """
marco@0 19
marco@0 20 import argparse, getpass, zipfile,os,sys
marco@0 21 from sword2 import *
marco@0 22
marco@0 23 # Parse arguments
marco@0 24 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
marco@0 25 parser.add_argument("data", type=str, nargs=1,
marco@1 26 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
marco@0 27 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
marco@0 28 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
marco@0 29 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
marco@0 30 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
marco@1 31 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
marco@0 32
marco@0 33 args = parser.parse_args()
marco@0 34 data = args.data[0]
marco@0 35 if args.dspaceurl == None:
marco@0 36 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
marco@0 37 else:
marco@0 38 dspaceurl = args.dspaceurl[0]
marco@0 39
marco@0 40
marco@0 41 try:
marco@0 42 # Connect to SWORD server
marco@0 43 attempts = 3 # Number of attempts left to connect to server
marco@0 44 connected = False
marco@0 45 while attempts>0 and not connected:
marco@0 46 print "Connecting to SWORD server. Remaining attempts: ", attempts
marco@0 47 # Try to login, get service document
marco@0 48 # Get username and password
marco@0 49 if args.user_name == None:
marco@0 50 user_name = raw_input("DSpace Username: ")
marco@0 51 else:
marco@0 52 user_name = args.user_name[0]
marco@0 53 print "DSpace Username: ",user_name
marco@0 54 user_pass = getpass.getpass("DSpace password:")
marco@0 55 # Connect to the server
marco@0 56 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
marco@3 57
marco@0 58 # Get service document
marco@0 59 try:
marco@0 60 c.get_service_document()
marco@3 61 except: # Server error
marco@3 62 print "Server unreachable!"
marco@3 63 break
marco@3 64 if c.sd != None:
marco@3 65 connected = True
marco@3 66 else:
marco@0 67 attempts-=1
marco@0 68 print "Incorrect username and/or password"
marco@3 69
marco@0 70
marco@0 71 if connected:
marco@0 72 # List available collections
marco@0 73 print "------------------------"
marco@0 74 print "Welcome to the ",c.workspaces[0][0], "repository"
marco@0 75 print "Available Collections: "
marco@0 76 numColl = len(c.workspaces[0][1])
marco@0 77 for ctr in range(numColl):
marco@0 78 coll = c.workspaces[0][1][ctr]
marco@0 79 print ctr+1,":",coll.title
marco@0 80 # Select a collection to deposit into
marco@0 81 sel = -1
marco@0 82 while (sel<=0 or sel>numColl):
marco@0 83 sel = input("Select a Collection to submit your files into: ")
marco@0 84 collection = c.workspaces[0][1][sel-1]
marco@0 85 print "Selected Collection: ",collection.title
marco@0 86
marco@0 87 # Create a submission: build the zip files
marco@0 88 temp = True # delete the zip file at the end of the upload
marco@0 89 if zipfile.is_zipfile(data):
marco@0 90 zipf = data
marco@0 91 temp = False
marco@0 92 elif os.path.isfile(data): # This is a single file
marco@0 93 dataname = os.path.basename(data)
marco@0 94 zipf = os.path.splitext(dataname)[0]+".zip"
marco@0 95 myzip = zipfile.ZipFile(zipf, "w")
marco@3 96 myzip.write(data,os.path.basename(data))
marco@0 97 myzip.close()
marco@3 98 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only...
marco@0 99 dataname = os.path.basename(os.path.normpath(data))
marco@0 100 zipf = dataname+".zip"
marco@0 101 myzip = zipfile.ZipFile(zipf, "w")
marco@0 102 # get the directory structure
marco@3 103 print "Creating a zip archive for submission..."
marco@0 104 for root, dirs, files in os.walk(data):
marco@0 105 for name in files:
marco@3 106 myzip.write(os.path.join(root,name),
marco@3 107 os.path.relpath(os.path.join(root,name),data))
marco@0 108 myzip.close()
marco@0 109 else:
marco@0 110 print "Couldn't find the data."
marco@0 111 sys.exit()
marco@0 112
marco@0 113 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
marco@0 114 myzip = zipfile.ZipFile(zipf)
marco@0 115 if "mets.xml" in myzip.namelist():
marco@0 116 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
marco@0 117 type = "METS"
marco@0 118 else:
marco@0 119 packaging = "http://purl.org/net/sword/package/SimpleZip"
marco@0 120 type = "SimpleZip"
marco@0 121
marco@0 122 print "------------------------"
marco@0 123 print "This is a ",type," submission"
marco@3 124 print "Uploading files..."
marco@0 125 myzip.close()
marco@0 126
marco@0 127 payload = open(zipf, "rb")
marco@0 128 try:
marco@0 129 receipt_dep = c.create(col_iri = collection.href,
marco@0 130 payload = payload,
marco@0 131 filename = zipf,
marco@0 132 mimetype = "application/zip",
marco@0 133 packaging = packaging)
marco@0 134 print type, " submission successful."
marco@0 135 if type == "SimpleZip":
marco@0 136 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
marco@0 137 if (args.title != None) or (args.author != None) or (args.date != None):
marco@0 138 entry = Entry()
marco@0 139 print "------------------------"
marco@0 140 print "Updating with additional metadata"
marco@0 141 if args.title != None:
marco@0 142 entry.add_fields(dcterms_title = args.title[0])
marco@0 143 if args.author != None:
marco@0 144 for creator in args.author:
marco@0 145 entry.add_fields(dcterms_creator=creator)
marco@0 146 if args.date != None:
marco@0 147 entry.add_fields(dcterms_created = args.date[0])
marco@0 148 try:
marco@0 149 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
marco@0 150 print "Additional metadata updated successfully."
marco@0 151 except:
marco@0 152 print "Server error"
marco@0 153 print "------------------------"
marco@0 154 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
marco@0 155 except:
marco@0 156 print "Error! Couldn't submit the file!"
marco@0 157 if type == "METS": # Just guessing: not sure this is the problem...
marco@0 158 print "To submit a METS package, the collection MUST have a workflow!"
marco@0 159 payload.close()
marco@3 160 if temp:
marco@3 161 os.remove(zipf)
marco@0 162
marco@0 163 else: # Failed to connect to SWORDv2 Server
marco@0 164 print "Couldn't connect to the server."
marco@0 165 if attempts == 0:
marco@0 166 print "Invalid credentials entered 3 times."
marco@0 167
marco@0 168 except KeyboardInterrupt:
marco@0 169 print "------------------------"
marco@3 170 print "\nSubmission aborted by user."