annotate sworduploader.py @ 1:2ceacc9bb602

Added README and LICENSE.txt files
author Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk>
date Wed, 28 Mar 2012 11:16:56 +0100
parents 71b6185352a5
children c72d6b5d58bc
rev   line source
marco@0 1 #!usr/bin/env/ python
marco@1 2
marco@1 3 """ SWORD2 DSpace bulk uploader
marco@1 4
marco@1 5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
marco@1 6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
marco@1 7
marco@1 8 Dependencies:
marco@1 9
marco@1 10 - python 2.X
marco@1 11
marco@1 12 - sword2 library: https://bitbucket.org/beno/python-sword2/src
marco@1 13
marco@1 14 -----------------------------------
marco@1 15 Copyright 2012 Marco Fabiani
marco@1 16 Copyright 2012 Queen Mary, University of London
marco@1 17 -----------------------------------
marco@1 18 """
marco@0 19
marco@0 20 import argparse, getpass, zipfile,os,sys
marco@0 21 from sword2 import *
marco@0 22
marco@0 23 # Parse arguments
marco@0 24 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
marco@0 25 parser.add_argument("data", type=str, nargs=1,
marco@1 26 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
marco@0 27 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
marco@0 28 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
marco@0 29 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
marco@0 30 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
marco@1 31 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
marco@0 32
marco@0 33 args = parser.parse_args()
marco@0 34 data = args.data[0]
marco@0 35 if args.dspaceurl == None:
marco@0 36 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
marco@0 37 else:
marco@0 38 dspaceurl = args.dspaceurl[0]
marco@0 39
marco@0 40
marco@0 41 try:
marco@0 42 # Connect to SWORD server
marco@0 43 attempts = 3 # Number of attempts left to connect to server
marco@0 44 connected = False
marco@0 45 while attempts>0 and not connected:
marco@0 46 print "Connecting to SWORD server. Remaining attempts: ", attempts
marco@0 47 # Try to login, get service document
marco@0 48 # Get username and password
marco@0 49 if args.user_name == None:
marco@0 50 user_name = raw_input("DSpace Username: ")
marco@0 51 else:
marco@0 52 user_name = args.user_name[0]
marco@0 53 print "DSpace Username: ",user_name
marco@0 54 user_pass = getpass.getpass("DSpace password:")
marco@0 55 # Connect to the server
marco@0 56 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
marco@0 57 # Get service document
marco@0 58 try:
marco@0 59 c.get_service_document()
marco@0 60 except: # Could be Forbidden if the exception was raised
marco@0 61 attempts-=1
marco@0 62 print "Incorrect username and/or password"
marco@0 63 if c.sd != None:
marco@0 64 connected = True
marco@0 65
marco@0 66 if connected:
marco@0 67 # List available collections
marco@0 68 print "------------------------"
marco@0 69 print "Welcome to the ",c.workspaces[0][0], "repository"
marco@0 70 print "Available Collections: "
marco@0 71 numColl = len(c.workspaces[0][1])
marco@0 72 for ctr in range(numColl):
marco@0 73 coll = c.workspaces[0][1][ctr]
marco@0 74 print ctr+1,":",coll.title
marco@0 75 # Select a collection to deposit into
marco@0 76 sel = -1
marco@0 77 while (sel<=0 or sel>numColl):
marco@0 78 sel = input("Select a Collection to submit your files into: ")
marco@0 79 collection = c.workspaces[0][1][sel-1]
marco@0 80 print "Selected Collection: ",collection.title
marco@0 81
marco@0 82 # Create a submission: build the zip files
marco@0 83 temp = True # delete the zip file at the end of the upload
marco@0 84 if zipfile.is_zipfile(data):
marco@0 85 zipf = data
marco@0 86 temp = False
marco@0 87 elif os.path.isfile(data): # This is a single file
marco@0 88 dataname = os.path.basename(data)
marco@0 89 zipf = os.path.splitext(dataname)[0]+".zip"
marco@0 90 myzip = zipfile.ZipFile(zipf, "w")
marco@0 91 myzip.write(data)
marco@0 92 myzip.close()
marco@0 93 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure!
marco@0 94 dataname = os.path.basename(os.path.normpath(data))
marco@0 95 zipf = dataname+".zip"
marco@0 96 myzip = zipfile.ZipFile(zipf, "w")
marco@0 97 # get the directory structure
marco@0 98 for root, dirs, files in os.walk(data):
marco@0 99 for name in files:
marco@0 100 myzip.write(os.path.join(root,name))
marco@0 101 myzip.close()
marco@0 102 else:
marco@0 103 print "Couldn't find the data."
marco@0 104 sys.exit()
marco@0 105
marco@0 106 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
marco@0 107 myzip = zipfile.ZipFile(zipf)
marco@0 108 if "mets.xml" in myzip.namelist():
marco@0 109 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
marco@0 110 type = "METS"
marco@0 111 else:
marco@0 112 packaging = "http://purl.org/net/sword/package/SimpleZip"
marco@0 113 type = "SimpleZip"
marco@0 114
marco@0 115 print "------------------------"
marco@0 116 print "This is a ",type," submission"
marco@0 117 myzip.close()
marco@0 118
marco@0 119 payload = open(zipf, "rb")
marco@0 120 try:
marco@0 121 receipt_dep = c.create(col_iri = collection.href,
marco@0 122 payload = payload,
marco@0 123 filename = zipf,
marco@0 124 mimetype = "application/zip",
marco@0 125 packaging = packaging)
marco@0 126 print type, " submission successful."
marco@0 127 if type == "SimpleZip":
marco@0 128 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
marco@0 129 if (args.title != None) or (args.author != None) or (args.date != None):
marco@0 130 entry = Entry()
marco@0 131 print "------------------------"
marco@0 132 print "Updating with additional metadata"
marco@0 133 if args.title != None:
marco@0 134 entry.add_fields(dcterms_title = args.title[0])
marco@0 135 if args.author != None:
marco@0 136 for creator in args.author:
marco@0 137 entry.add_fields(dcterms_creator=creator)
marco@0 138 if args.date != None:
marco@0 139 entry.add_fields(dcterms_created = args.date[0])
marco@0 140 try:
marco@0 141 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
marco@0 142 print "Additional metadata updated successfully."
marco@0 143 except:
marco@0 144 print "Server error"
marco@0 145 print "------------------------"
marco@0 146 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
marco@0 147 except:
marco@0 148 print "Error! Couldn't submit the file!"
marco@0 149 if type == "METS": # Just guessing: not sure this is the problem...
marco@0 150 print "To submit a METS package, the collection MUST have a workflow!"
marco@0 151 payload.close()
marco@0 152
marco@0 153 else: # Failed to connect to SWORDv2 Server
marco@0 154 print "Couldn't connect to the server."
marco@0 155 if attempts == 0:
marco@0 156 print "Invalid credentials entered 3 times."
marco@0 157 if temp:
marco@0 158 os.remove(zipf)
marco@0 159
marco@0 160 except KeyboardInterrupt:
marco@0 161 print "------------------------"
marco@0 162 print "\nSubmission aborted by user."
marco@0 163