Mercurial > hg > sworduploader
view sworduploader.py @ 4:96d62e78ac9f
Added support for single file upload usin edit media resources
author | Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk> |
---|---|
date | Thu, 29 Mar 2012 17:39:46 +0100 |
parents | c1918aa337c4 |
children | 13fc2773b3fe |
line wrap: on
line source
#!usr/bin/env/ python """ SWORD2 DSpace bulk uploader - v0.3 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x. Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview Dependencies: - python 2.X - sword2 library, with modifications: (original) https://bitbucket.org/beno/python-sword2/src (modified) https://code.soundsoftware.ac.uk/hg/sworduploader ----------------------------------- Copyright 2012 Marco Fabiani Copyright 2012 Queen Mary, University of London ----------------------------------- """ import argparse, getpass, zipfile, os, sys from sword2 import * # Parse arguments parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.") parser.add_argument("data", type=str, nargs=1, help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!") parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.") parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.") parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).") parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"") parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).") parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument") args = parser.parse_args() data = args.data[0] if args.dspaceurl == None: dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument" else: dspaceurl = args.dspaceurl[0] try: # Connect to SWORD server attempts = 3 # Number of attempts left to connect to server connected = False while attempts>0 and not connected: print "Connecting to SWORD server. Remaining attempts: ", attempts # Try to login, get service document # Get username and password if args.user_name == None: user_name = raw_input("DSpace Username: ") else: user_name = args.user_name[0] print "DSpace Username: ",user_name user_pass = getpass.getpass("DSpace password:") # Connect to the server c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False) # Get service document try: c.get_service_document() except: # Server error print "Server unreachable!" break if c.sd != None: connected = True else: attempts-=1 print "Incorrect username and/or password" if connected: # List available collections print "------------------------" print "Welcome to the ",c.workspaces[0][0], "repository" print "Available Collections: " numColl = len(c.workspaces[0][1]) for ctr in range(numColl): coll = c.workspaces[0][1][ctr] print ctr+1,":",coll.title # Select a collection to deposit into sel = -1 while (sel<=0 or sel>numColl): sel = input("Select a Collection to submit your files into: ") collection = c.workspaces[0][1][sel-1] print "Selected Collection: ",collection.title # Create a submission fileslist = [] temp = False # Delete temp files # If folder if os.path.isdir(data): if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only... dataname = os.path.basename(os.path.normpath(data)) zipf = dataname+".zip" myzip = zipfile.ZipFile(zipf, "w") # get the directory structure print "Creating a zip archive for submission..." for root, dirs, files in os.walk(data): for name in files: myzip.write(os.path.join(root,name), os.path.relpath(os.path.join(root,name),data)) fileslist.append(zipf) packaging = "http://purl.org/net/sword/package/SimpleZip" type = "SimpleZip" temp = True else: #create a list of files to upload for root, dirs, files in os.walk(data): for name in files: fileslist.append(os.path.join(root,name)) type = "multiple files" elif zipfile.is_zipfile(data): #This is a zip file fileslist.append(data) myzip = zipfile.ZipFile(data) if "mets.xml" in myzip.namelist(): #This is a METS package packaging = "http://purl.org/net/sword/package/METSDSpaceSIP" type = "METS" else: #THis is a simple zip file packaging = "http://purl.org/net/sword/package/SimpleZip" type = "SimpleZip" myzip.close() elif os.path.isfile(data): # This is a single file fileslist.append(data) type = "single file" else: print "Couldn't find the data." sys.exit() print "------------------------" print "This is a ",type," submission" # Create a metadata entry if (args.title != None) or (args.author != None) or (args.date != None): entry = Entry() if args.title != None: entry.add_fields(dcterms_title = args.title[0]) if args.author != None: for creator in args.author: entry.add_fields(dcterms_creator=creator) if args.date != None: entry.add_fields(dcterms_created = args.date[0]) else: entry = None # Select what to do if (type is "single file") or (type is "multiple files"): try: # Create the metadata entry with ATOM print "------------------------" print "Creating the item..." if entry is None: entry = Entry(dcterms_title=(os.path.basename(data))) creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry) # Add the files for f in fileslist: print "Uploading file ",os.path.basename(f) payload = open(f,"rb") deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media, payload = payload, filename = os.path.basename(f), mimetype = 'application/zip', packaging = 'http://purl.org/net/sword/package/Binary') payload.close() except HTTPResponseError: print "Bad request" else: # PUT the data payload = open(fileslist[0], "rb") try: deposit_receipt = c.create(col_iri = collection.href, payload = payload, filename = fileslist[0], mimetype = "application/zip", packaging = packaging) print type, " submission successful." except: print "Error! Couldn't submit the file!" if type == "METS": # Just guessing: not sure this is the problem... print "To submit a METS package, the collection MUST have a workflow!" payload.close() # If some of the additional arguments for author, title, date etc. have been specified, update the metadata if type == "SimpleZip" and entry != None: try: receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission print "Additional metadata updated successfully." except: print "Server error" if temp: os.remove(fileslist[0]) print "------------------------" print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." else: # Failed to connect to SWORDv2 Server print "Couldn't connect to the server." if attempts == 0: print "Invalid credentials entered 3 times." except KeyboardInterrupt: print "------------------------" print "\nSubmission aborted by user."