comparison sworduploader.py @ 4:96d62e78ac9f

Added support for single file upload usin edit media resources
author Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk>
date Thu, 29 Mar 2012 17:39:46 +0100
parents c1918aa337c4
children 13fc2773b3fe
comparison
equal deleted inserted replaced
3:c1918aa337c4 4:96d62e78ac9f
1 #!usr/bin/env/ python 1 #!usr/bin/env/ python
2 2
3 """ SWORD2 DSpace bulk uploader - v0.2 3 """ SWORD2 DSpace bulk uploader - v0.3
4 4
5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x. 5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview 6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
7 7
8 Dependencies: 8 Dependencies:
9 9
10 - python 2.X 10 - python 2.X
11 11
12 - sword2 library: https://bitbucket.org/beno/python-sword2/src 12 - sword2 library, with modifications:
13 (original) https://bitbucket.org/beno/python-sword2/src
14 (modified) https://code.soundsoftware.ac.uk/hg/sworduploader
13 15
14 ----------------------------------- 16 -----------------------------------
15 Copyright 2012 Marco Fabiani 17 Copyright 2012 Marco Fabiani
16 Copyright 2012 Queen Mary, University of London 18 Copyright 2012 Queen Mary, University of London
17 ----------------------------------- 19 -----------------------------------
18 """ 20 """
19 21
20 import argparse, getpass, zipfile,os,sys 22 import argparse, getpass, zipfile, os, sys
21 from sword2 import * 23 from sword2 import *
22 24
23 # Parse arguments 25 # Parse arguments
24 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.") 26 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
25 parser.add_argument("data", type=str, nargs=1, 27 parser.add_argument("data", type=str, nargs=1,
26 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!") 28 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
27 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.") 29 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
30 parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.")
28 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).") 31 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
29 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"") 32 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
30 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).") 33 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
31 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument") 34 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
32 35
82 while (sel<=0 or sel>numColl): 85 while (sel<=0 or sel>numColl):
83 sel = input("Select a Collection to submit your files into: ") 86 sel = input("Select a Collection to submit your files into: ")
84 collection = c.workspaces[0][1][sel-1] 87 collection = c.workspaces[0][1][sel-1]
85 print "Selected Collection: ",collection.title 88 print "Selected Collection: ",collection.title
86 89
87 # Create a submission: build the zip files 90 # Create a submission
88 temp = True # delete the zip file at the end of the upload 91 fileslist = []
89 if zipfile.is_zipfile(data): 92 temp = False # Delete temp files
90 zipf = data 93 # If folder
91 temp = False 94 if os.path.isdir(data):
95 if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
96 dataname = os.path.basename(os.path.normpath(data))
97 zipf = dataname+".zip"
98 myzip = zipfile.ZipFile(zipf, "w")
99 # get the directory structure
100 print "Creating a zip archive for submission..."
101 for root, dirs, files in os.walk(data):
102 for name in files:
103 myzip.write(os.path.join(root,name),
104 os.path.relpath(os.path.join(root,name),data))
105 fileslist.append(zipf)
106 packaging = "http://purl.org/net/sword/package/SimpleZip"
107 type = "SimpleZip"
108 temp = True
109 else: #create a list of files to upload
110 for root, dirs, files in os.walk(data):
111 for name in files:
112 fileslist.append(os.path.join(root,name))
113 type = "multiple files"
114 elif zipfile.is_zipfile(data): #This is a zip file
115 fileslist.append(data)
116 myzip = zipfile.ZipFile(data)
117 if "mets.xml" in myzip.namelist(): #This is a METS package
118 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
119 type = "METS"
120 else: #THis is a simple zip file
121 packaging = "http://purl.org/net/sword/package/SimpleZip"
122 type = "SimpleZip"
123 myzip.close()
92 elif os.path.isfile(data): # This is a single file 124 elif os.path.isfile(data): # This is a single file
93 dataname = os.path.basename(data) 125 fileslist.append(data)
94 zipf = os.path.splitext(dataname)[0]+".zip" 126 type = "single file"
95 myzip = zipfile.ZipFile(zipf, "w")
96 myzip.write(data,os.path.basename(data))
97 myzip.close()
98 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only...
99 dataname = os.path.basename(os.path.normpath(data))
100 zipf = dataname+".zip"
101 myzip = zipfile.ZipFile(zipf, "w")
102 # get the directory structure
103 print "Creating a zip archive for submission..."
104 for root, dirs, files in os.walk(data):
105 for name in files:
106 myzip.write(os.path.join(root,name),
107 os.path.relpath(os.path.join(root,name),data))
108 myzip.close()
109 else: 127 else:
110 print "Couldn't find the data." 128 print "Couldn't find the data."
111 sys.exit() 129 sys.exit()
112 130
113 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
114 myzip = zipfile.ZipFile(zipf)
115 if "mets.xml" in myzip.namelist():
116 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
117 type = "METS"
118 else:
119 packaging = "http://purl.org/net/sword/package/SimpleZip"
120 type = "SimpleZip"
121
122 print "------------------------" 131 print "------------------------"
123 print "This is a ",type," submission" 132 print "This is a ",type," submission"
124 print "Uploading files..." 133
125 myzip.close() 134 # Create a metadata entry
126 135 if (args.title != None) or (args.author != None) or (args.date != None):
127 payload = open(zipf, "rb") 136 entry = Entry()
128 try: 137 if args.title != None:
129 receipt_dep = c.create(col_iri = collection.href, 138 entry.add_fields(dcterms_title = args.title[0])
130 payload = payload, 139 if args.author != None:
131 filename = zipf, 140 for creator in args.author:
132 mimetype = "application/zip", 141 entry.add_fields(dcterms_creator=creator)
133 packaging = packaging) 142 if args.date != None:
134 print type, " submission successful." 143 entry.add_fields(dcterms_created = args.date[0])
135 if type == "SimpleZip": 144 else:
136 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata 145 entry = None
137 if (args.title != None) or (args.author != None) or (args.date != None): 146 # Select what to do
138 entry = Entry() 147 if (type is "single file") or (type is "multiple files"):
139 print "------------------------" 148 try:
140 print "Updating with additional metadata" 149 # Create the metadata entry with ATOM
141 if args.title != None: 150 print "------------------------"
142 entry.add_fields(dcterms_title = args.title[0]) 151 print "Creating the item..."
143 if args.author != None: 152 if entry is None:
144 for creator in args.author: 153 entry = Entry(dcterms_title=(os.path.basename(data)))
145 entry.add_fields(dcterms_creator=creator) 154 creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry)
146 if args.date != None: 155
147 entry.add_fields(dcterms_created = args.date[0]) 156 # Add the files
148 try: 157 for f in fileslist:
149 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission 158 print "Uploading file ",os.path.basename(f)
150 print "Additional metadata updated successfully." 159 payload = open(f,"rb")
151 except: 160 deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
152 print "Server error" 161 payload = payload,
153 print "------------------------" 162 filename = os.path.basename(f),
154 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." 163 mimetype = 'application/zip',
155 except: 164 packaging = 'http://purl.org/net/sword/package/Binary')
156 print "Error! Couldn't submit the file!" 165 payload.close()
157 if type == "METS": # Just guessing: not sure this is the problem... 166 except HTTPResponseError:
158 print "To submit a METS package, the collection MUST have a workflow!" 167 print "Bad request"
159 payload.close() 168 else:
160 if temp: 169 # PUT the data
161 os.remove(zipf) 170 payload = open(fileslist[0], "rb")
171 try:
172 deposit_receipt = c.create(col_iri = collection.href,
173 payload = payload,
174 filename = fileslist[0],
175 mimetype = "application/zip",
176 packaging = packaging)
177 print type, " submission successful."
178 except:
179 print "Error! Couldn't submit the file!"
180 if type == "METS": # Just guessing: not sure this is the problem...
181 print "To submit a METS package, the collection MUST have a workflow!"
182 payload.close()
183
184 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
185 if type == "SimpleZip" and entry != None:
186 try:
187 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
188 print "Additional metadata updated successfully."
189 except:
190 print "Server error"
191 if temp:
192 os.remove(fileslist[0])
193
194 print "------------------------"
195 print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
196
162 197
163 else: # Failed to connect to SWORDv2 Server 198 else: # Failed to connect to SWORDv2 Server
164 print "Couldn't connect to the server." 199 print "Couldn't connect to the server."
165 if attempts == 0: 200 if attempts == 0:
166 print "Invalid credentials entered 3 times." 201 print "Invalid credentials entered 3 times."