marco@0: #!usr/bin/env/ python marco@1: marco@3: """ SWORD2 DSpace bulk uploader - v0.2 marco@1: marco@1: A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x. marco@1: Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview marco@1: marco@1: Dependencies: marco@1: marco@1: - python 2.X marco@1: marco@1: - sword2 library: https://bitbucket.org/beno/python-sword2/src marco@1: marco@1: ----------------------------------- marco@1: Copyright 2012 Marco Fabiani marco@1: Copyright 2012 Queen Mary, University of London marco@1: ----------------------------------- marco@1: """ marco@0: marco@0: import argparse, getpass, zipfile,os,sys marco@0: from sword2 import * marco@0: marco@0: # Parse arguments marco@0: parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.") marco@0: parser.add_argument("data", type=str, nargs=1, marco@1: help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!") marco@0: parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.") marco@0: parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).") marco@0: parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"") marco@0: parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).") marco@1: parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument") marco@0: marco@0: args = parser.parse_args() marco@0: data = args.data[0] marco@0: if args.dspaceurl == None: marco@0: dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument" marco@0: else: marco@0: dspaceurl = args.dspaceurl[0] marco@0: marco@0: marco@0: try: marco@0: # Connect to SWORD server marco@0: attempts = 3 # Number of attempts left to connect to server marco@0: connected = False marco@0: while attempts>0 and not connected: marco@0: print "Connecting to SWORD server. Remaining attempts: ", attempts marco@0: # Try to login, get service document marco@0: # Get username and password marco@0: if args.user_name == None: marco@0: user_name = raw_input("DSpace Username: ") marco@0: else: marco@0: user_name = args.user_name[0] marco@0: print "DSpace Username: ",user_name marco@0: user_pass = getpass.getpass("DSpace password:") marco@0: # Connect to the server marco@0: c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False) marco@3: marco@0: # Get service document marco@0: try: marco@0: c.get_service_document() marco@3: except: # Server error marco@3: print "Server unreachable!" marco@3: break marco@3: if c.sd != None: marco@3: connected = True marco@3: else: marco@0: attempts-=1 marco@0: print "Incorrect username and/or password" marco@3: marco@0: marco@0: if connected: marco@0: # List available collections marco@0: print "------------------------" marco@0: print "Welcome to the ",c.workspaces[0][0], "repository" marco@0: print "Available Collections: " marco@0: numColl = len(c.workspaces[0][1]) marco@0: for ctr in range(numColl): marco@0: coll = c.workspaces[0][1][ctr] marco@0: print ctr+1,":",coll.title marco@0: # Select a collection to deposit into marco@0: sel = -1 marco@0: while (sel<=0 or sel>numColl): marco@0: sel = input("Select a Collection to submit your files into: ") marco@0: collection = c.workspaces[0][1][sel-1] marco@0: print "Selected Collection: ",collection.title marco@0: marco@0: # Create a submission: build the zip files marco@0: temp = True # delete the zip file at the end of the upload marco@0: if zipfile.is_zipfile(data): marco@0: zipf = data marco@0: temp = False marco@0: elif os.path.isfile(data): # This is a single file marco@0: dataname = os.path.basename(data) marco@0: zipf = os.path.splitext(dataname)[0]+".zip" marco@0: myzip = zipfile.ZipFile(zipf, "w") marco@3: myzip.write(data,os.path.basename(data)) marco@0: myzip.close() marco@3: elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only... marco@0: dataname = os.path.basename(os.path.normpath(data)) marco@0: zipf = dataname+".zip" marco@0: myzip = zipfile.ZipFile(zipf, "w") marco@0: # get the directory structure marco@3: print "Creating a zip archive for submission..." marco@0: for root, dirs, files in os.walk(data): marco@0: for name in files: marco@3: myzip.write(os.path.join(root,name), marco@3: os.path.relpath(os.path.join(root,name),data)) marco@0: myzip.close() marco@0: else: marco@0: print "Couldn't find the data." marco@0: sys.exit() marco@0: marco@0: #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip marco@0: myzip = zipfile.ZipFile(zipf) marco@0: if "mets.xml" in myzip.namelist(): marco@0: packaging = "http://purl.org/net/sword/package/METSDSpaceSIP" marco@0: type = "METS" marco@0: else: marco@0: packaging = "http://purl.org/net/sword/package/SimpleZip" marco@0: type = "SimpleZip" marco@0: marco@0: print "------------------------" marco@0: print "This is a ",type," submission" marco@3: print "Uploading files..." marco@0: myzip.close() marco@0: marco@0: payload = open(zipf, "rb") marco@0: try: marco@0: receipt_dep = c.create(col_iri = collection.href, marco@0: payload = payload, marco@0: filename = zipf, marco@0: mimetype = "application/zip", marco@0: packaging = packaging) marco@0: print type, " submission successful." marco@0: if type == "SimpleZip": marco@0: # If some of the additional arguments for author, title, date etc. have been specified, update the metadata marco@0: if (args.title != None) or (args.author != None) or (args.date != None): marco@0: entry = Entry() marco@0: print "------------------------" marco@0: print "Updating with additional metadata" marco@0: if args.title != None: marco@0: entry.add_fields(dcterms_title = args.title[0]) marco@0: if args.author != None: marco@0: for creator in args.author: marco@0: entry.add_fields(dcterms_creator=creator) marco@0: if args.date != None: marco@0: entry.add_fields(dcterms_created = args.date[0]) marco@0: try: marco@0: receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission marco@0: print "Additional metadata updated successfully." marco@0: except: marco@0: print "Server error" marco@0: print "------------------------" marco@0: print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." marco@0: except: marco@0: print "Error! Couldn't submit the file!" marco@0: if type == "METS": # Just guessing: not sure this is the problem... marco@0: print "To submit a METS package, the collection MUST have a workflow!" marco@0: payload.close() marco@3: if temp: marco@3: os.remove(zipf) marco@0: marco@0: else: # Failed to connect to SWORDv2 Server marco@0: print "Couldn't connect to the server." marco@0: if attempts == 0: marco@0: print "Invalid credentials entered 3 times." marco@0: marco@0: except KeyboardInterrupt: marco@0: print "------------------------" marco@3: print "\nSubmission aborted by user."