changeset 20:8b9e7f2f80e2 timeouts

Updated to: (i) allow timeout and password as parameters (ii) use connection/collection/item/file objects
author Steve Welburn <stephen.welburn@eecs.qmul.ac.uk>
date Tue, 22 Jan 2013 13:41:24 +0000
parents 57bc248c367a
children 3fb1ac952fb2
files sworduploader.py
diffstat 1 files changed, 298 insertions(+), 189 deletions(-) [+]
line wrap: on
line diff
--- a/sworduploader.py	Tue Jan 22 13:36:17 2013 +0000
+++ b/sworduploader.py	Tue Jan 22 13:41:24 2013 +0000
@@ -34,7 +34,7 @@
 
   The above copyright notice and this permission notice shall be
   included in all copies or substantial portions of the Software.
-  
+
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -48,28 +48,31 @@
 A copy of this License can also be found in the COPYING file distributed with the source code.
 """
 
-import argparse, getpass, zipfile, os, sys
+import argparse, getpass, zipfile, os, sword2.http_layer
 from sword2 import *
 
 # Parse arguments
-parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
+parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORD v2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
 parser.add_argument("data", type=str, nargs=1,
-                   help="Accepts: METSDSpaceSIP and BagIt packages, simple zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
-parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
+				   help="Accepts: METSDSpaceSIP and BagIt packages, simple zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
+parser.add_argument("--username", dest="user_name", type=str, nargs=1, help="DSpace username.")
+parser.add_argument("--password", dest="password", type=str, nargs=1, help="DSpace password.")
+parser.add_argument("--timeout", dest="timeout", type=float, nargs=1, default=30.0, help="Timeout for response for connections. Make sure this is long enough to allow files to be uploaded.")
 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
-parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
-parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
-parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, compress it and post it as a single file. The zip file will be saved along with the individual files.")
-parser.add_argument("--servicedoc", dest="sd", type=str,nargs=1, help="Url of the SWORDv2 service document (default: use server.cfg if available, otherwise http://c4dm.eecs.qmul.ac.uk/rdr/swordv2/servicedocument")
+parser.add_argument("--author", dest="author", type=str, nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
+parser.add_argument("--date", dest="date", type=str, nargs=1, help="Date of creation (string) (ignored for METS packages).")
+parser.add_argument("--zip", action="store_true", dest="zip", default=False, help="If \"data\" is a directory, compress it and post it as a single file. The zip file will be saved along with the individual files.")
+parser.add_argument("--servicedoc", dest="sd", type=str, nargs=1, help="Url of the SWORD v2 service document (default: use server.cfg if available, otherwise http://c4dm.eecs.qmul.ac.uk/rdr/swordv2/servicedocument")
 
 args = parser.parse_args()
 data = args.data[0]
+timeout = args.timeout[0]
 if args.zip:
-	storezip = True
+	storeZip = True
 else:
-	storezip = False
+	storeZip = False
 
-if args.sd == None:
+if args.sd is None:
 	try:
 		f = open("server.cfg", "r")
 		sd = f.readline()
@@ -79,201 +82,307 @@
 else:
 	sd = args.sd[0]
 
+class swordConnection(object):
+	def __init__(self):
+		self.serverConnection = None
+		self.connected = False
+		self.name = ""
 
-try:
-	# Connect to SWORD server: it will always try to authenticate (no anonymous submissions!
-	attempts = 3 #  Number of attempts left to connect to server
-	connected = False
-	while attempts>0 and not connected:
-		print "Connecting to SWORD server. Remaining attempts: ", attempts
-		# Try to login, get service document
-		# Get username and password
-		if args.user_name == None:
-			user_name = raw_input("Username: ")
+	def connect(self, timeout=30.0):
+		self.serverConnection = None
+		self.connected = False
+		httpImp = sword2.http_layer.HttpLib2Layer(".cache", timeout=timeout)
+		print "Connection timeout is ", timeout, "seconds."
+		# Connect to SWORD server: it will always try to authenticate (no anonymous submissions!
+		attempts = 3 #  Number of attempts left to connect to server
+		while attempts>0 and not self.connected:
+			print "Connecting to SWORD server. Remaining attempts: ", attempts
+			# Try to login, get service document
+			# Get username and password
+			if args.user_name is None:
+				user_name = raw_input("Username: ")
+			else:
+				user_name = args.user_name[0]
+				print "Username: ",user_name
+
+			if args.password is None:
+				user_pass = getpass.getpass("Password:")
+			else:
+				user_pass = args.password[0]
+			# Connect to the server
+
+			self.serverConnection = Connection(sd, user_name=user_name, user_pass=user_pass,keep_history=False,http_impl=httpImp)
+
+			# Get service document
+			try:
+				self.serverConnection.get_service_document()
+			except: # Server error
+				print "Server unreachable!"
+				break
+
+			if self.serverConnection.sd is not None:
+				self.connected = True
+			else:
+				attempts-=1
+				print "Incorrect username and/or password"
+
+		if not self.connected:
+			# Failed to connect to SWORD v2 Server
+			print "Couldn't connect to the server."
+			if attempts == 0:
+				raise Exception, "Invalid credentials entered 3 times."
+			else:
+				raise Exception, "Unable to connect to server"
 		else:
-			user_name = args.user_name[0]
-			print "Username: ",user_name
-		user_pass = getpass.getpass("Password:")
-		# Connect to the server
-		c = Connection(sd, user_name=user_name, user_pass=user_pass,keep_history=False)
-		
-		# Get service document
-		try:
-			c.get_service_document()
-		except: # Server error
-			print "Server unreachable!"
-			break
-		if c.sd != None:
-			connected = True
-		else:
-			attempts-=1
-			print "Incorrect username and/or password"
-			
-			
-	if connected:
+			self.name = self.serverConnection.workspaces[0][0]
+
+	def selectCollection(self):
 		# List available collections
-		print "------------------------"
-		print "Welcome to the ",c.workspaces[0][0], "repository"
 		print "Available Collections: "
-		numColl = len(c.workspaces[0][1])
+		numColl = len(self.serverConnection.workspaces[0][1])
 		for ctr in range(numColl):
-			coll = c.workspaces[0][1][ctr]
+			coll = self.serverConnection.workspaces[0][1][ctr]
 			print ctr+1,":",coll.title
 		# Select a collection to deposit into
 		sel = "0"
-		while (not sel.isdigit() or int(sel)<=0 or int(sel)>numColl):
+		while (not sel.isdigit()) or int(sel)<=0 or int(sel)>numColl:
 			sel = raw_input("Select a Collection to submit your files into: ")
 		sel = int(sel)
-		collection = c.workspaces[0][1][sel-1]
-		print "Selected Collection: ",collection.title
-		
-		# Create a submission
-		fileslist = []
-		temp = False # Delete temp files
-		# If folder
-		if os.path.isdir(data):
-			if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
-				dataname = os.path.basename(os.path.normpath(data))
-				if args.title != None:
-					zipf = args.title[0].replace(" ","_")+".zip"
-				else:
-					zipf = dataname.replace(" ","_")+".zip"
-				myzip = zipfile.ZipFile(zipf, "w")
-				# get the directory structure
-				print "Creating a zip archive for submission..."
-				for root, dirs, files in os.walk(data):
-					for name in files:
-						if not name.startswith('.'): # Do not upload hidden files, OSX/linux
-							myzip.write(os.path.join(root,name),
-								os.path.relpath(os.path.join(root,name),data).replace(" ","_").replace("[","(").replace("]",")")) # Remove spaces and square brakets
-				fileslist.append(zipf)
-				myzip.close()
-				packaging = "http://purl.org/net/sword/package/SimpleZip"
-				type = "SimpleZip"
-				temp = True
-			else: #create a list of files to upload
-				for root, dirs, files in os.walk(data):
-					for name in files:
-						if not name.startswith('.'):
-							fileslist.append(os.path.join(root,name))
-				type = "multiple files"
-		elif zipfile.is_zipfile(data): #This is a zip file
-			fileslist.append(data)
-			myzip = zipfile.ZipFile(data)
-			if "mets.xml" in myzip.namelist(): #This is a METS package
-				packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
-				type = "METS"
-				in_progress = False
-			elif "bagit.txt" in "".join(myzip.namelist()): #This is a BagIt package
-				packaging = "http://purl.org/net/sword/package/BagIt"
-				type = "BAGIT"
-			else:#THis is a simple zip file
-				packaging = "http://purl.org/net/sword/package/SimpleZip"
-				type = "SimpleZip"
-			myzip.close()
-		elif os.path.isfile(data): # This is a single file
-			fileslist.append(data)
-			type = "single file"
+		collection = swordCollection(self, self.serverConnection.workspaces[0][1][sel-1])
+		return collection
+
+
+class swordCollection(object):
+	def __init__(self, connection, collection):
+		self.connection = connection
+		self.serverCollection = collection
+
+	def title(self):
+		return self.serverCollection.title
+
+	def createItem(self, metadata_entry, in_progress=True):
+		creationReceipt = self.connection.serverConnection.create(col_iri = self.serverCollection.href, metadata_entry = metadata_entry, in_progress=in_progress)
+		return swordItem(self.connection, self, creationReceipt)
+
+	def createItemFromFile(self, file, metadata_entry, in_progress=True):
+		depositReceipt = None
+		payload = open(file.path, "rb")
+		try:
+			deposit_receipt = self.connection.serverConnection.create(col_iri = self.serverCollection.href,
+				payload = payload,
+				filename = file.filename,
+				mimetype = file.mimetype,
+				packaging = file.packaging,
+				in_progress = in_progress)
+			print type, " submission successful."
+		except:
+			print "Error! Couldn't submit the file!"
+			if type == "METS": # Just guessing: not sure this is the problem...
+				print "To submit a METS package, the collection MUST have a workflow!"
+		payload.close()
+
+		return swordItem(self.connection, self, depositReceipt)
+
+class swordItem(object):
+	def __init__(self, connection, collection, receipt):
+		self.connection = connection
+		self.serverCollection = collection
+		self.receipt = receipt
+
+	def addFile(self, file):
+#		print "Adding to", self.receipt.edit_media
+#		print str(file)
+		payload = open(file.path, "rb")
+		print "Uploading file ", file.filename,
+		file.deposit_receipt = self.connection.serverConnection.add_file_to_resource(self.receipt.edit_media,
+			payload = payload,
+			filename = file.filename,
+			mimetype = file.mimetype,
+			packaging = file.packaging)
+		payload.close()
+		print "[uploaded]"
+
+	def updateMetadata(self, metadataEntry, in_progress=True):
+		try:
+			update_receipt = self.connection.serverConnection.update(dr = self.receipt, metadata_entry = metadataEntry, in_progress = in_progress)
+			print "Metadata update successful."
+		except:
+			print "Server error"
+			raise
+
+# Class to encapsulate a SWORD2 payload file
+class swordFile(object):
+	def __init__(self, path, filename=None):
+		self.path = path
+		self.deposit_receipt = None
+		if filename is None:
+			self.filename = os.path.basename(path)
 		else:
-			print "Couldn't find the data."
-			sys.exit()
-		
-		print "------------------------"
-		print "This is a ",type," submission"
-		
-		# Create a metadata entry
-		if (args.title != None) or (args.author != None) or (args.date != None):
-			entry = Entry()	
-			if  args.title != None:
-				entry.add_fields(dcterms_title = args.title[0])
-			if args.author != None:
-				for creator in args.author:
-					entry.add_fields(dcterms_creator=creator)
-			if args.date != None:
-				entry.add_fields(dcterms_created = args.date[0])
+			self.filename = filename
+		# Default to a basic binary file
+		self.mimetype = "application/octet+stream"
+		self.packaging = 'http://purl.org/net/sword/package/Binary'
+
+	def __str__(self):
+		return "path:" + str(self.path) + ", filename:" + str(self.filename) + ", mimetype:" + str(self.mimetype) + ", packaging:" + str(self.packaging)
+
+def getSubmissionData(args, data):
+	# Create a submission
+	filesList = []
+	temp = False # Delete temp files
+	packaging = None
+	# If folder
+	if os.path.isdir(data):
+		if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
+			dataName = os.path.basename(os.path.normpath(data))
+			if args.title is not None:
+				zipFile = args.title[0].replace(" ","_")+".zip"
+			else:
+				zipFile = dataName.replace(" ","_")+".zip"
+			myZip = zipfile.ZipFile(zipFile, "w")
+			# get the directory structure
+			print "Creating a zip archive for submission..."
+			for root, dirs, files in os.walk(data):
+				for name in files:
+					if not name.startswith('.'): # Do not upload hidden files, OSX/linux
+						# Remove spaces and square brackets
+						myZip.write(os.path.join(root,name),
+							os.path.relpath(os.path.join(root,name),data).replace(" ","_").replace("[","(").replace("]",")"))
+			filesList.append(zipFile)
+			myZip.close()
+			packaging = "http://purl.org/net/sword/package/SimpleZip"
+			type = "SimpleZip"
+			temp = True
 		else:
-			entry = None
-		# Select what to do
-		if (type is "single file") or (type is "multiple files"): # Use the single file upload procedure
-			try:
-				# Create the metadata entry with ATOM
-				print "------------------------"
-				print "Creating the item..."
-				if entry is None:
-					entry = Entry(dcterms_title=(os.path.basename(data)))
-				creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry, in_progress=True)
-				
-				# Add the files
+			# Create a list of files to upload
+			for root, dirs, files in os.walk(data):
+				for name in files:
+					if not name.startswith('.'):
+						filesList.append(os.path.join(root,name))
+			type = "multiple files"
+	elif zipfile.is_zipfile(data):
+		# This is a zip file
+		filesList.append(data)
+		myZip = zipfile.ZipFile(data)
+		if "mets.xml" in myZip.namelist():
+			# This is a METS package
+			packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
+			type = "METS"
+			in_progress = False
+		elif "bagit.txt" in "".join(myZip.namelist()):
+			# This is a BagIt package
+			packaging = "http://purl.org/net/sword/package/BagIt"
+			type = "BAGIT"
+		else:
+			# This is a simple zip file
+			packaging = "http://purl.org/net/sword/package/SimpleZip"
+			type = "SimpleZip"
+		myZip.close()
+	elif os.path.isfile(data): # This is a single file
+		filesList.append(data)
+		type = "single file"
+	else:
+		raise Exception, "Couldn't find the data."
+
+	submissionData = {"files": filesList, "packaging": packaging, "type":type, "isTemporaryFile":temp}
+	return submissionData
+
+def setupMetadataEntry(args):
+	# Create a metadata entry
+	if (args.title is not None) or (args.author is not None) or (args.date is not None):
+		entry = Entry()
+		if  args.title is not None:
+			entry.add_fields(dcterms_title = args.title[0])
+		if args.author is not None:
+			for creator in args.author:
+				entry.add_fields(dcterms_creator=creator)
+		if args.date is not None:
+			entry.add_fields(dcterms_created = args.date[0])
+	else:
+		entry = None
+	return entry
+
+try:
+	serverConnection = swordConnection()
+	serverConnection.connect(timeout)
+	print "------------------------"
+	print "Welcome to the", serverConnection.name, "repository"
+
+	collectionForItem = serverConnection.selectCollection()
+	print "Selected Collection:", collectionForItem.title()
+
+	submissionData = getSubmissionData(args, data)
+
+	print "------------------------"
+	print "This is a", submissionData["type"], "submission"
+
+	metadataEntry = setupMetadataEntry(args)
+
+	# Select what to do
+	if (submissionData["type"] == "single file") or (submissionData["type"] == "multiple files"): # Use the single file upload procedure
+		try:
+			# Create the metadata entry with ATOM
+			print "------------------------"
+			print "Creating the", submissionData["type"], "item... "
+			if metadataEntry is None:
+				metadataEntry = Entry(dcterms_title=(os.path.basename(data)))
+			collectionItem = collectionForItem.createItem(metadata_entry = metadataEntry, in_progress=True)
+			print "Item created"
+
+			# Create a list of files to upload
+			if submissionData["type"] == "single file":
+				payLoadList = [swordFile(submissionData["files"][0])]
+			else:
 				# Get the longest common path in order to send the correct filename to keep the structure
-				common = os.path.commonprefix(fileslist)
-				for f in fileslist:
-					filename = os.path.relpath(f,common)
-					print "Uploading file ", filename
-					payload = open(f,"rb")
-					deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
-						payload = payload,
-						filename = filename,
-						mimetype = 'application/zip',
-						packaging = 'http://purl.org/net/sword/package/Binary')
-					payload.close()
-			except HTTPResponseError:
-				print "Bad request"
+				common = os.path.commonprefix(submissionData["files"])
+				payLoadList=[]
+				for f in submissionData["files"]:
+					filename = os.path.relpath(f, common)
+					payLoadList.append(swordFile(f, filename))
+
+			# Upload the files
+			for payload in payLoadList:
+				collectionItem.addFile(payload)
+		except HTTPResponseError:
+			print "Bad request"
+	else:
+		# Send the zip file and let the ingester do its job
+		if (type == "SimpleZip") or (type=="BAGIT"):
+			in_progress = True
+			# FIXME: we don't want to write silly things in dc.description!
 		else:
-			# Send the zip file and let the ingester do its job
-			payload = open(fileslist[0], "rb")
-			if (type == "SimpleZip") or (type=="BAGIT"):
-				in_progress = True
-				# FIXME: we don't want to write silly things in dc.description!
-			else:
-				in_progress = False
-			try:
-				deposit_receipt = c.create(col_iri = collection.href,
-					payload = payload,
-					filename = fileslist[0],
-					mimetype = "application/zip",
-					packaging = packaging,
-					in_progress = in_progress)
-				print type, " submission successful."
-			except:
-				print "Error! Couldn't submit the file!"						
-				if type == "METS": # Just guessing: not sure this is the problem...
-					print "To submit a METS package, the collection MUST have a workflow!"
-			payload.close()
-			
-			# If some of the additional arguments for author, title, date etc. have been specified, update the metadata (only SimpleZip)
-			if type == "SimpleZip":
-				if entry is None:
-					entry = Entry(dcterms_title=(os.path.basename(fileslist[0])))
+			in_progress = False
+
+		payload = swordFile(submissionData["files"][0])
+		payload.mimetype = "application/zip"
+		payload.packaging = submissionData["packaging"]
+		item = collectionForItem.createItemFromFile(payload, in_progress)
+
+		# If some of the additional arguments for author, title, date etc. have been specified, update the metadata (only SimpleZip)
+		if type == "SimpleZip":
+			if metadataEntry is None:
+				metadataEntry = Entry(dcterms_title=(os.path.basename(submissionData["files"][0])))
+
+			# in_progress is True: we don't want to close the submission
+			item.updateMetadata(metadataEntry, in_progress=True)
+
+			# If we want to store the zip file along with the individual files (Only SimpleZip)
+			if storeZip:
 				try:
-					update_receipt = c.update(dr = deposit_receipt , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
-					print "Metadata update successfull."
+					zipPayload = swordFile(submissionData["files"][0], os.path.basename(submissionData["files"][0]).replace(" ", "_"))
+					zipPayload.mimetype = "application/zip"
+					zipPayload.packaging = 'http://purl.org/net/sword/package/Binary'
+					item.addFile(zipPayload)
+					print "Zip file successfully added to the bitstreams."
 				except:
-					print "Server error"
-				# If we want to store the zip file along with the individual files (Only SimpleZip)
-				if storezip:
-					try:
-						payload = open(fileslist[0],"rb")
-						zipdeposit_receipt = c.add_file_to_resource(edit_media_iri = deposit_receipt.edit_media,
-							payload = payload,
-							filename = os.path.basename(fileslist[0]).replace(" ","_"),
-							mimetype = 'application/zip',
-							packaging = 'http://purl.org/net/sword/package/Binary')
-						payload.close()
-						print "Zip file successfully added to the bitstreams."
-					except:
-						print "Server error: could not add the zip file to the resources"
-			if temp:
-				os.remove(fileslist[0])
+					print "Server error: could not add the zip file to the resources"
 
-		print "------------------------"
-		print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."			
+		if submissionData["isTemporaryFile"]:
+			os.remove(submissionData["files"][0])
 
-		
-	else: # Failed to connect to SWORDv2 Server
-		print "Couldn't connect to the server."
-		if attempts == 0:
-			print "Invalid credentials entered 3 times."
-	
+	print "------------------------"
+	print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
+
 except KeyboardInterrupt:
 	print "------------------------"
 	print "\nSubmission aborted by user."
\ No newline at end of file