# HG changeset patch # User Marco Fabiani # Date 1333039186 -3600 # Node ID 96d62e78ac9f6af8752fa09940ad980a1b7696c4 # Parent c1918aa337c45f51c4b2750b0ad6d92650e73616 Added support for single file upload usin edit media resources diff -r c1918aa337c4 -r 96d62e78ac9f sworduploader.py --- a/sworduploader.py Wed Mar 28 17:48:59 2012 +0100 +++ b/sworduploader.py Thu Mar 29 17:39:46 2012 +0100 @@ -1,6 +1,6 @@ #!usr/bin/env/ python -""" SWORD2 DSpace bulk uploader - v0.2 +""" SWORD2 DSpace bulk uploader - v0.3 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x. Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview @@ -9,7 +9,9 @@ - python 2.X -- sword2 library: https://bitbucket.org/beno/python-sword2/src +- sword2 library, with modifications: + (original) https://bitbucket.org/beno/python-sword2/src + (modified) https://code.soundsoftware.ac.uk/hg/sworduploader ----------------------------------- Copyright 2012 Marco Fabiani @@ -17,7 +19,7 @@ ----------------------------------- """ -import argparse, getpass, zipfile,os,sys +import argparse, getpass, zipfile, os, sys from sword2 import * # Parse arguments @@ -25,6 +27,7 @@ parser.add_argument("data", type=str, nargs=1, help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!") parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.") +parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.") parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).") parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"") parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).") @@ -84,81 +87,113 @@ collection = c.workspaces[0][1][sel-1] print "Selected Collection: ",collection.title - # Create a submission: build the zip files - temp = True # delete the zip file at the end of the upload - if zipfile.is_zipfile(data): - zipf = data - temp = False + # Create a submission + fileslist = [] + temp = False # Delete temp files + # If folder + if os.path.isdir(data): + if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only... + dataname = os.path.basename(os.path.normpath(data)) + zipf = dataname+".zip" + myzip = zipfile.ZipFile(zipf, "w") + # get the directory structure + print "Creating a zip archive for submission..." + for root, dirs, files in os.walk(data): + for name in files: + myzip.write(os.path.join(root,name), + os.path.relpath(os.path.join(root,name),data)) + fileslist.append(zipf) + packaging = "http://purl.org/net/sword/package/SimpleZip" + type = "SimpleZip" + temp = True + else: #create a list of files to upload + for root, dirs, files in os.walk(data): + for name in files: + fileslist.append(os.path.join(root,name)) + type = "multiple files" + elif zipfile.is_zipfile(data): #This is a zip file + fileslist.append(data) + myzip = zipfile.ZipFile(data) + if "mets.xml" in myzip.namelist(): #This is a METS package + packaging = "http://purl.org/net/sword/package/METSDSpaceSIP" + type = "METS" + else: #THis is a simple zip file + packaging = "http://purl.org/net/sword/package/SimpleZip" + type = "SimpleZip" + myzip.close() elif os.path.isfile(data): # This is a single file - dataname = os.path.basename(data) - zipf = os.path.splitext(dataname)[0]+".zip" - myzip = zipfile.ZipFile(zipf, "w") - myzip.write(data,os.path.basename(data)) - myzip.close() - elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only... - dataname = os.path.basename(os.path.normpath(data)) - zipf = dataname+".zip" - myzip = zipfile.ZipFile(zipf, "w") - # get the directory structure - print "Creating a zip archive for submission..." - for root, dirs, files in os.walk(data): - for name in files: - myzip.write(os.path.join(root,name), - os.path.relpath(os.path.join(root,name),data)) - myzip.close() + fileslist.append(data) + type = "single file" else: print "Couldn't find the data." sys.exit() - #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip - myzip = zipfile.ZipFile(zipf) - if "mets.xml" in myzip.namelist(): - packaging = "http://purl.org/net/sword/package/METSDSpaceSIP" - type = "METS" - else: - packaging = "http://purl.org/net/sword/package/SimpleZip" - type = "SimpleZip" - print "------------------------" print "This is a ",type," submission" - print "Uploading files..." - myzip.close() + + # Create a metadata entry + if (args.title != None) or (args.author != None) or (args.date != None): + entry = Entry() + if args.title != None: + entry.add_fields(dcterms_title = args.title[0]) + if args.author != None: + for creator in args.author: + entry.add_fields(dcterms_creator=creator) + if args.date != None: + entry.add_fields(dcterms_created = args.date[0]) + else: + entry = None + # Select what to do + if (type is "single file") or (type is "multiple files"): + try: + # Create the metadata entry with ATOM + print "------------------------" + print "Creating the item..." + if entry is None: + entry = Entry(dcterms_title=(os.path.basename(data))) + creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry) + + # Add the files + for f in fileslist: + print "Uploading file ",os.path.basename(f) + payload = open(f,"rb") + deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media, + payload = payload, + filename = os.path.basename(f), + mimetype = 'application/zip', + packaging = 'http://purl.org/net/sword/package/Binary') + payload.close() + except HTTPResponseError: + print "Bad request" + else: + # PUT the data + payload = open(fileslist[0], "rb") + try: + deposit_receipt = c.create(col_iri = collection.href, + payload = payload, + filename = fileslist[0], + mimetype = "application/zip", + packaging = packaging) + print type, " submission successful." + except: + print "Error! Couldn't submit the file!" + if type == "METS": # Just guessing: not sure this is the problem... + print "To submit a METS package, the collection MUST have a workflow!" + payload.close() + + # If some of the additional arguments for author, title, date etc. have been specified, update the metadata + if type == "SimpleZip" and entry != None: + try: + receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission + print "Additional metadata updated successfully." + except: + print "Server error" + if temp: + os.remove(fileslist[0]) - payload = open(zipf, "rb") - try: - receipt_dep = c.create(col_iri = collection.href, - payload = payload, - filename = zipf, - mimetype = "application/zip", - packaging = packaging) - print type, " submission successful." - if type == "SimpleZip": - # If some of the additional arguments for author, title, date etc. have been specified, update the metadata - if (args.title != None) or (args.author != None) or (args.date != None): - entry = Entry() - print "------------------------" - print "Updating with additional metadata" - if args.title != None: - entry.add_fields(dcterms_title = args.title[0]) - if args.author != None: - for creator in args.author: - entry.add_fields(dcterms_creator=creator) - if args.date != None: - entry.add_fields(dcterms_created = args.date[0]) - try: - receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission - print "Additional metadata updated successfully." - except: - print "Server error" - print "------------------------" - print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." - except: - print "Error! Couldn't submit the file!" - if type == "METS": # Just guessing: not sure this is the problem... - print "To submit a METS package, the collection MUST have a workflow!" - payload.close() - if temp: - os.remove(zipf) + print "------------------------" + print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." + else: # Failed to connect to SWORDv2 Server print "Couldn't connect to the server."