annotate sworduploader.py @ 4:96d62e78ac9f

Added support for single file upload usin edit media resources
author Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk>
date Thu, 29 Mar 2012 17:39:46 +0100
parents c1918aa337c4
children 13fc2773b3fe
rev   line source
marco@0 1 #!usr/bin/env/ python
marco@1 2
marco@4 3 """ SWORD2 DSpace bulk uploader - v0.3
marco@1 4
marco@1 5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
marco@1 6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
marco@1 7
marco@1 8 Dependencies:
marco@1 9
marco@1 10 - python 2.X
marco@1 11
marco@4 12 - sword2 library, with modifications:
marco@4 13 (original) https://bitbucket.org/beno/python-sword2/src
marco@4 14 (modified) https://code.soundsoftware.ac.uk/hg/sworduploader
marco@1 15
marco@1 16 -----------------------------------
marco@1 17 Copyright 2012 Marco Fabiani
marco@1 18 Copyright 2012 Queen Mary, University of London
marco@1 19 -----------------------------------
marco@1 20 """
marco@0 21
marco@4 22 import argparse, getpass, zipfile, os, sys
marco@0 23 from sword2 import *
marco@0 24
marco@0 25 # Parse arguments
marco@0 26 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
marco@0 27 parser.add_argument("data", type=str, nargs=1,
marco@1 28 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
marco@0 29 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
marco@4 30 parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.")
marco@0 31 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
marco@0 32 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
marco@0 33 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
marco@1 34 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
marco@0 35
marco@0 36 args = parser.parse_args()
marco@0 37 data = args.data[0]
marco@0 38 if args.dspaceurl == None:
marco@0 39 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
marco@0 40 else:
marco@0 41 dspaceurl = args.dspaceurl[0]
marco@0 42
marco@0 43
marco@0 44 try:
marco@0 45 # Connect to SWORD server
marco@0 46 attempts = 3 # Number of attempts left to connect to server
marco@0 47 connected = False
marco@0 48 while attempts>0 and not connected:
marco@0 49 print "Connecting to SWORD server. Remaining attempts: ", attempts
marco@0 50 # Try to login, get service document
marco@0 51 # Get username and password
marco@0 52 if args.user_name == None:
marco@0 53 user_name = raw_input("DSpace Username: ")
marco@0 54 else:
marco@0 55 user_name = args.user_name[0]
marco@0 56 print "DSpace Username: ",user_name
marco@0 57 user_pass = getpass.getpass("DSpace password:")
marco@0 58 # Connect to the server
marco@0 59 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
marco@3 60
marco@0 61 # Get service document
marco@0 62 try:
marco@0 63 c.get_service_document()
marco@3 64 except: # Server error
marco@3 65 print "Server unreachable!"
marco@3 66 break
marco@3 67 if c.sd != None:
marco@3 68 connected = True
marco@3 69 else:
marco@0 70 attempts-=1
marco@0 71 print "Incorrect username and/or password"
marco@3 72
marco@0 73
marco@0 74 if connected:
marco@0 75 # List available collections
marco@0 76 print "------------------------"
marco@0 77 print "Welcome to the ",c.workspaces[0][0], "repository"
marco@0 78 print "Available Collections: "
marco@0 79 numColl = len(c.workspaces[0][1])
marco@0 80 for ctr in range(numColl):
marco@0 81 coll = c.workspaces[0][1][ctr]
marco@0 82 print ctr+1,":",coll.title
marco@0 83 # Select a collection to deposit into
marco@0 84 sel = -1
marco@0 85 while (sel<=0 or sel>numColl):
marco@0 86 sel = input("Select a Collection to submit your files into: ")
marco@0 87 collection = c.workspaces[0][1][sel-1]
marco@0 88 print "Selected Collection: ",collection.title
marco@0 89
marco@4 90 # Create a submission
marco@4 91 fileslist = []
marco@4 92 temp = False # Delete temp files
marco@4 93 # If folder
marco@4 94 if os.path.isdir(data):
marco@4 95 if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
marco@4 96 dataname = os.path.basename(os.path.normpath(data))
marco@4 97 zipf = dataname+".zip"
marco@4 98 myzip = zipfile.ZipFile(zipf, "w")
marco@4 99 # get the directory structure
marco@4 100 print "Creating a zip archive for submission..."
marco@4 101 for root, dirs, files in os.walk(data):
marco@4 102 for name in files:
marco@4 103 myzip.write(os.path.join(root,name),
marco@4 104 os.path.relpath(os.path.join(root,name),data))
marco@4 105 fileslist.append(zipf)
marco@4 106 packaging = "http://purl.org/net/sword/package/SimpleZip"
marco@4 107 type = "SimpleZip"
marco@4 108 temp = True
marco@4 109 else: #create a list of files to upload
marco@4 110 for root, dirs, files in os.walk(data):
marco@4 111 for name in files:
marco@4 112 fileslist.append(os.path.join(root,name))
marco@4 113 type = "multiple files"
marco@4 114 elif zipfile.is_zipfile(data): #This is a zip file
marco@4 115 fileslist.append(data)
marco@4 116 myzip = zipfile.ZipFile(data)
marco@4 117 if "mets.xml" in myzip.namelist(): #This is a METS package
marco@4 118 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
marco@4 119 type = "METS"
marco@4 120 else: #THis is a simple zip file
marco@4 121 packaging = "http://purl.org/net/sword/package/SimpleZip"
marco@4 122 type = "SimpleZip"
marco@4 123 myzip.close()
marco@0 124 elif os.path.isfile(data): # This is a single file
marco@4 125 fileslist.append(data)
marco@4 126 type = "single file"
marco@0 127 else:
marco@0 128 print "Couldn't find the data."
marco@0 129 sys.exit()
marco@0 130
marco@0 131 print "------------------------"
marco@0 132 print "This is a ",type," submission"
marco@4 133
marco@4 134 # Create a metadata entry
marco@4 135 if (args.title != None) or (args.author != None) or (args.date != None):
marco@4 136 entry = Entry()
marco@4 137 if args.title != None:
marco@4 138 entry.add_fields(dcterms_title = args.title[0])
marco@4 139 if args.author != None:
marco@4 140 for creator in args.author:
marco@4 141 entry.add_fields(dcterms_creator=creator)
marco@4 142 if args.date != None:
marco@4 143 entry.add_fields(dcterms_created = args.date[0])
marco@4 144 else:
marco@4 145 entry = None
marco@4 146 # Select what to do
marco@4 147 if (type is "single file") or (type is "multiple files"):
marco@4 148 try:
marco@4 149 # Create the metadata entry with ATOM
marco@4 150 print "------------------------"
marco@4 151 print "Creating the item..."
marco@4 152 if entry is None:
marco@4 153 entry = Entry(dcterms_title=(os.path.basename(data)))
marco@4 154 creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry)
marco@4 155
marco@4 156 # Add the files
marco@4 157 for f in fileslist:
marco@4 158 print "Uploading file ",os.path.basename(f)
marco@4 159 payload = open(f,"rb")
marco@4 160 deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
marco@4 161 payload = payload,
marco@4 162 filename = os.path.basename(f),
marco@4 163 mimetype = 'application/zip',
marco@4 164 packaging = 'http://purl.org/net/sword/package/Binary')
marco@4 165 payload.close()
marco@4 166 except HTTPResponseError:
marco@4 167 print "Bad request"
marco@4 168 else:
marco@4 169 # PUT the data
marco@4 170 payload = open(fileslist[0], "rb")
marco@4 171 try:
marco@4 172 deposit_receipt = c.create(col_iri = collection.href,
marco@4 173 payload = payload,
marco@4 174 filename = fileslist[0],
marco@4 175 mimetype = "application/zip",
marco@4 176 packaging = packaging)
marco@4 177 print type, " submission successful."
marco@4 178 except:
marco@4 179 print "Error! Couldn't submit the file!"
marco@4 180 if type == "METS": # Just guessing: not sure this is the problem...
marco@4 181 print "To submit a METS package, the collection MUST have a workflow!"
marco@4 182 payload.close()
marco@4 183
marco@4 184 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
marco@4 185 if type == "SimpleZip" and entry != None:
marco@4 186 try:
marco@4 187 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
marco@4 188 print "Additional metadata updated successfully."
marco@4 189 except:
marco@4 190 print "Server error"
marco@4 191 if temp:
marco@4 192 os.remove(fileslist[0])
marco@0 193
marco@4 194 print "------------------------"
marco@4 195 print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
marco@4 196
marco@0 197
marco@0 198 else: # Failed to connect to SWORDv2 Server
marco@0 199 print "Couldn't connect to the server."
marco@0 200 if attempts == 0:
marco@0 201 print "Invalid credentials entered 3 times."
marco@0 202
marco@0 203 except KeyboardInterrupt:
marco@0 204 print "------------------------"
marco@3 205 print "\nSubmission aborted by user."