changeset 4:96d62e78ac9f

Added support for single file upload usin edit media resources
author Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk>
date Thu, 29 Mar 2012 17:39:46 +0100
parents c1918aa337c4
children 13fc2773b3fe
files sworduploader.py
diffstat 1 files changed, 105 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/sworduploader.py	Wed Mar 28 17:48:59 2012 +0100
+++ b/sworduploader.py	Thu Mar 29 17:39:46 2012 +0100
@@ -1,6 +1,6 @@
 #!usr/bin/env/ python
 
-""" SWORD2 DSpace bulk uploader - v0.2
+""" SWORD2 DSpace bulk uploader - v0.3
 
 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
@@ -9,7 +9,9 @@
 
 - python 2.X
 
-- sword2 library: https://bitbucket.org/beno/python-sword2/src
+- sword2 library, with modifications:
+ 	(original) https://bitbucket.org/beno/python-sword2/src
+ 	(modified) https://code.soundsoftware.ac.uk/hg/sworduploader 
 
 -----------------------------------
 Copyright 2012 Marco Fabiani
@@ -17,7 +19,7 @@
 -----------------------------------
 """
 
-import argparse, getpass, zipfile,os,sys
+import argparse, getpass, zipfile, os, sys
 from sword2 import *
 
 # Parse arguments
@@ -25,6 +27,7 @@
 parser.add_argument("data", type=str, nargs=1,
                    help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
+parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.")
 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
@@ -84,81 +87,113 @@
 		collection = c.workspaces[0][1][sel-1]
 		print "Selected Collection: ",collection.title
 		
-		# Create a submission: build the zip files
-		temp = True # delete the zip file at the end of the upload	
-		if zipfile.is_zipfile(data):
-			zipf = data
-			temp = False
+		# Create a submission
+		fileslist = []
+		temp = False # Delete temp files
+		# If folder
+		if os.path.isdir(data):
+			if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
+				dataname = os.path.basename(os.path.normpath(data))
+				zipf = dataname+".zip"
+				myzip = zipfile.ZipFile(zipf, "w")
+				# get the directory structure
+				print "Creating a zip archive for submission..."
+				for root, dirs, files in os.walk(data):
+					for name in files:
+						myzip.write(os.path.join(root,name),
+							os.path.relpath(os.path.join(root,name),data))
+				fileslist.append(zipf)
+				packaging = "http://purl.org/net/sword/package/SimpleZip"
+				type = "SimpleZip"
+				temp = True
+			else: #create a list of files to upload
+				for root, dirs, files in os.walk(data):
+					for name in files:
+						fileslist.append(os.path.join(root,name))
+				type = "multiple files"
+		elif zipfile.is_zipfile(data): #This is a zip file
+			fileslist.append(data)
+			myzip = zipfile.ZipFile(data)
+			if "mets.xml" in myzip.namelist(): #This is a METS package
+				packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
+				type = "METS"
+			else: #THis is a simple zip file
+				packaging = "http://purl.org/net/sword/package/SimpleZip"
+				type = "SimpleZip"
+			myzip.close()
 		elif os.path.isfile(data): # This is a single file
-			dataname = os.path.basename(data)
-			zipf = os.path.splitext(dataname)[0]+".zip"
-			myzip = zipfile.ZipFile(zipf, "w")
-			myzip.write(data,os.path.basename(data))
-			myzip.close()
-		elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only...
-			dataname = os.path.basename(os.path.normpath(data))
-			zipf = dataname+".zip"
-			myzip = zipfile.ZipFile(zipf, "w")
-			# get the directory structure
-			print "Creating a zip archive for submission..."
-			for root, dirs, files in os.walk(data):
-				for name in files:
-					myzip.write(os.path.join(root,name),
-						os.path.relpath(os.path.join(root,name),data))
-			myzip.close()
+			fileslist.append(data)
+			type = "single file"
 		else:
 			print "Couldn't find the data."
 			sys.exit()
 		
-		#Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
-		myzip = zipfile.ZipFile(zipf)
-		if "mets.xml" in myzip.namelist():
-			packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
-			type = "METS"
-		else:
-			packaging = "http://purl.org/net/sword/package/SimpleZip"
-			type = "SimpleZip"
-		
 		print "------------------------"
 		print "This is a ",type," submission"
-		print "Uploading files..."
-		myzip.close()
+		
+		# Create a metadata entry
+		if (args.title != None) or (args.author != None) or (args.date != None):
+			entry = Entry()	
+			if  args.title != None:
+				entry.add_fields(dcterms_title = args.title[0])
+			if args.author != None:
+				for creator in args.author:
+					entry.add_fields(dcterms_creator=creator)
+			if args.date != None:
+				entry.add_fields(dcterms_created = args.date[0])
+		else:
+			entry = None
+		# Select what to do
+		if (type is "single file") or (type is "multiple files"):
+			try:
+				# Create the metadata entry with ATOM
+				print "------------------------"
+				print "Creating the item..."
+				if entry is None:
+					entry = Entry(dcterms_title=(os.path.basename(data)))
+				creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry)
+				
+				# Add the files
+				for f in fileslist:
+					print "Uploading file ",os.path.basename(f)
+					payload = open(f,"rb")
+					deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
+						payload = payload,
+						filename = os.path.basename(f),
+						mimetype = 'application/zip',
+						packaging = 'http://purl.org/net/sword/package/Binary')
+					payload.close()
+			except HTTPResponseError:
+				print "Bad request"
+		else:
+			# PUT the data
+			payload = open(fileslist[0], "rb")
+			try:
+				deposit_receipt = c.create(col_iri = collection.href,
+					payload = payload,
+					filename = fileslist[0],
+					mimetype = "application/zip",
+					packaging = packaging)
+				print type, " submission successful."
+			except:
+				print "Error! Couldn't submit the file!"						
+				if type == "METS": # Just guessing: not sure this is the problem...
+					print "To submit a METS package, the collection MUST have a workflow!"
+			payload.close()
+			
+			# If some of the additional arguments for author, title, date etc. have been specified, update the metadata
+			if type == "SimpleZip" and entry != None:
+				try:
+					receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
+					print "Additional metadata updated successfully."
+				except:
+					print "Server error"
+			if temp:
+				os.remove(fileslist[0])
 
-		payload = open(zipf, "rb")
-		try:
-			receipt_dep = c.create(col_iri = collection.href,
-			payload = payload,
-			filename = zipf,
-			mimetype = "application/zip",
-			packaging = packaging)
-			print type, " submission successful."
-			if type == "SimpleZip":
-				# If some of the additional arguments for author, title, date etc. have been specified, update the metadata
-				if (args.title != None) or (args.author != None) or (args.date != None):
-					entry = Entry()	
-					print "------------------------"
-					print "Updating with additional metadata"
-					if  args.title != None:
-						entry.add_fields(dcterms_title = args.title[0])
-					if args.author != None:
-						for creator in args.author:
-							entry.add_fields(dcterms_creator=creator)
-					if args.date != None:
-						entry.add_fields(dcterms_created = args.date[0])
-					try:
-						receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
-						print "Additional metadata updated successfully."
-					except:
-						print "Server error"
-			print "------------------------"
-			print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."			
-		except:
-			print "Error! Couldn't submit the file!"
-			if type == "METS": # Just guessing: not sure this is the problem...
-				print "To submit a METS package, the collection MUST have a workflow!"
-		payload.close()
-		if temp:
-			os.remove(zipf)
+		print "------------------------"
+		print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."			
+
 		
 	else: # Failed to connect to SWORDv2 Server
 		print "Couldn't connect to the server."