marco@0
|
1 #!usr/bin/env/ python
|
marco@1
|
2
|
marco@6
|
3 """ SWORD2 DSpace bulk uploader - v0.3
|
marco@1
|
4
|
marco@1
|
5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
|
marco@1
|
6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
|
marco@1
|
7
|
marco@1
|
8 Dependencies:
|
marco@1
|
9
|
marco@1
|
10 - python 2.X
|
marco@1
|
11
|
marco@4
|
12 - sword2 library, with modifications:
|
marco@4
|
13 (original) https://bitbucket.org/beno/python-sword2/src
|
marco@4
|
14 (modified) https://code.soundsoftware.ac.uk/hg/sworduploader
|
marco@1
|
15
|
marco@1
|
16 -----------------------------------
|
marco@1
|
17 Copyright 2012 Marco Fabiani
|
marco@1
|
18 Copyright 2012 Queen Mary, University of London
|
marco@1
|
19 -----------------------------------
|
marco@1
|
20 """
|
marco@0
|
21
|
marco@4
|
22 import argparse, getpass, zipfile, os, sys
|
marco@0
|
23 from sword2 import *
|
marco@0
|
24
|
marco@0
|
25 # Parse arguments
|
marco@0
|
26 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
|
marco@0
|
27 parser.add_argument("data", type=str, nargs=1,
|
marco@1
|
28 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
|
marco@0
|
29 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
|
marco@8
|
30 parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure. The zip file will be saved along with the individual files.")
|
marco@0
|
31 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
|
marco@0
|
32 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
|
marco@0
|
33 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
|
marco@1
|
34 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
|
marco@0
|
35
|
marco@0
|
36 args = parser.parse_args()
|
marco@0
|
37 data = args.data[0]
|
marco@8
|
38 storezip = True
|
marco@8
|
39
|
marco@0
|
40 if args.dspaceurl == None:
|
marco@0
|
41 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
|
marco@0
|
42 else:
|
marco@0
|
43 dspaceurl = args.dspaceurl[0]
|
marco@0
|
44
|
marco@0
|
45
|
marco@0
|
46 try:
|
marco@0
|
47 # Connect to SWORD server
|
marco@0
|
48 attempts = 3 # Number of attempts left to connect to server
|
marco@0
|
49 connected = False
|
marco@0
|
50 while attempts>0 and not connected:
|
marco@0
|
51 print "Connecting to SWORD server. Remaining attempts: ", attempts
|
marco@0
|
52 # Try to login, get service document
|
marco@0
|
53 # Get username and password
|
marco@0
|
54 if args.user_name == None:
|
marco@0
|
55 user_name = raw_input("DSpace Username: ")
|
marco@0
|
56 else:
|
marco@0
|
57 user_name = args.user_name[0]
|
marco@0
|
58 print "DSpace Username: ",user_name
|
marco@0
|
59 user_pass = getpass.getpass("DSpace password:")
|
marco@0
|
60 # Connect to the server
|
marco@0
|
61 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
|
marco@3
|
62
|
marco@0
|
63 # Get service document
|
marco@0
|
64 try:
|
marco@0
|
65 c.get_service_document()
|
marco@3
|
66 except: # Server error
|
marco@3
|
67 print "Server unreachable!"
|
marco@3
|
68 break
|
marco@3
|
69 if c.sd != None:
|
marco@3
|
70 connected = True
|
marco@3
|
71 else:
|
marco@0
|
72 attempts-=1
|
marco@0
|
73 print "Incorrect username and/or password"
|
marco@3
|
74
|
marco@0
|
75
|
marco@0
|
76 if connected:
|
marco@0
|
77 # List available collections
|
marco@0
|
78 print "------------------------"
|
marco@0
|
79 print "Welcome to the ",c.workspaces[0][0], "repository"
|
marco@0
|
80 print "Available Collections: "
|
marco@0
|
81 numColl = len(c.workspaces[0][1])
|
marco@0
|
82 for ctr in range(numColl):
|
marco@0
|
83 coll = c.workspaces[0][1][ctr]
|
marco@0
|
84 print ctr+1,":",coll.title
|
marco@0
|
85 # Select a collection to deposit into
|
marco@0
|
86 sel = -1
|
marco@0
|
87 while (sel<=0 or sel>numColl):
|
marco@0
|
88 sel = input("Select a Collection to submit your files into: ")
|
marco@0
|
89 collection = c.workspaces[0][1][sel-1]
|
marco@0
|
90 print "Selected Collection: ",collection.title
|
marco@0
|
91
|
marco@4
|
92 # Create a submission
|
marco@4
|
93 fileslist = []
|
marco@4
|
94 temp = False # Delete temp files
|
marco@4
|
95 # If folder
|
marco@4
|
96 if os.path.isdir(data):
|
marco@4
|
97 if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
|
marco@4
|
98 dataname = os.path.basename(os.path.normpath(data))
|
marco@4
|
99 zipf = dataname+".zip"
|
marco@4
|
100 myzip = zipfile.ZipFile(zipf, "w")
|
marco@4
|
101 # get the directory structure
|
marco@4
|
102 print "Creating a zip archive for submission..."
|
marco@4
|
103 for root, dirs, files in os.walk(data):
|
marco@4
|
104 for name in files:
|
marco@8
|
105 if not name.startswith('.'): # Do not upload hidden files, OSX/linux
|
marco@8
|
106 myzip.write(os.path.join(root,name),
|
marco@8
|
107 os.path.relpath(os.path.join(root,name),data))
|
marco@4
|
108 fileslist.append(zipf)
|
marco@5
|
109 myzip.close()
|
marco@4
|
110 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@4
|
111 type = "SimpleZip"
|
marco@4
|
112 temp = True
|
marco@4
|
113 else: #create a list of files to upload
|
marco@4
|
114 for root, dirs, files in os.walk(data):
|
marco@4
|
115 for name in files:
|
marco@4
|
116 fileslist.append(os.path.join(root,name))
|
marco@4
|
117 type = "multiple files"
|
marco@4
|
118 elif zipfile.is_zipfile(data): #This is a zip file
|
marco@4
|
119 fileslist.append(data)
|
marco@4
|
120 myzip = zipfile.ZipFile(data)
|
marco@4
|
121 if "mets.xml" in myzip.namelist(): #This is a METS package
|
marco@4
|
122 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
|
marco@4
|
123 type = "METS"
|
marco@8
|
124 in_progress = False
|
marco@4
|
125 else: #THis is a simple zip file
|
marco@4
|
126 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@4
|
127 type = "SimpleZip"
|
marco@4
|
128 myzip.close()
|
marco@0
|
129 elif os.path.isfile(data): # This is a single file
|
marco@4
|
130 fileslist.append(data)
|
marco@4
|
131 type = "single file"
|
marco@0
|
132 else:
|
marco@0
|
133 print "Couldn't find the data."
|
marco@0
|
134 sys.exit()
|
marco@0
|
135
|
marco@0
|
136 print "------------------------"
|
marco@0
|
137 print "This is a ",type," submission"
|
marco@4
|
138
|
marco@4
|
139 # Create a metadata entry
|
marco@4
|
140 if (args.title != None) or (args.author != None) or (args.date != None):
|
marco@4
|
141 entry = Entry()
|
marco@4
|
142 if args.title != None:
|
marco@4
|
143 entry.add_fields(dcterms_title = args.title[0])
|
marco@4
|
144 if args.author != None:
|
marco@4
|
145 for creator in args.author:
|
marco@4
|
146 entry.add_fields(dcterms_creator=creator)
|
marco@4
|
147 if args.date != None:
|
marco@4
|
148 entry.add_fields(dcterms_created = args.date[0])
|
marco@4
|
149 else:
|
marco@4
|
150 entry = None
|
marco@4
|
151 # Select what to do
|
marco@8
|
152 if (type is "single file") or (type is "multiple files"): # Use the single file upload procedure
|
marco@4
|
153 try:
|
marco@4
|
154 # Create the metadata entry with ATOM
|
marco@4
|
155 print "------------------------"
|
marco@4
|
156 print "Creating the item..."
|
marco@4
|
157 if entry is None:
|
marco@4
|
158 entry = Entry(dcterms_title=(os.path.basename(data)))
|
marco@8
|
159 creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry, in_progress=True)
|
marco@4
|
160
|
marco@4
|
161 # Add the files
|
marco@4
|
162 for f in fileslist:
|
marco@4
|
163 print "Uploading file ",os.path.basename(f)
|
marco@4
|
164 payload = open(f,"rb")
|
marco@4
|
165 deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
|
marco@4
|
166 payload = payload,
|
marco@4
|
167 filename = os.path.basename(f),
|
marco@4
|
168 mimetype = 'application/zip',
|
marco@4
|
169 packaging = 'http://purl.org/net/sword/package/Binary')
|
marco@4
|
170 payload.close()
|
marco@4
|
171 except HTTPResponseError:
|
marco@4
|
172 print "Bad request"
|
marco@4
|
173 else:
|
marco@8
|
174 # Send the zip file and let the ingester do its job
|
marco@4
|
175 payload = open(fileslist[0], "rb")
|
marco@8
|
176 if type == "SimpleZip":
|
marco@8
|
177 in_progress = True
|
marco@8
|
178 else:
|
marco@8
|
179 in_progress = False
|
marco@4
|
180 try:
|
marco@4
|
181 deposit_receipt = c.create(col_iri = collection.href,
|
marco@4
|
182 payload = payload,
|
marco@4
|
183 filename = fileslist[0],
|
marco@4
|
184 mimetype = "application/zip",
|
marco@8
|
185 packaging = packaging,
|
marco@8
|
186 in_progress = in_progress)
|
marco@4
|
187 print type, " submission successful."
|
marco@4
|
188 except:
|
marco@4
|
189 print "Error! Couldn't submit the file!"
|
marco@4
|
190 if type == "METS": # Just guessing: not sure this is the problem...
|
marco@4
|
191 print "To submit a METS package, the collection MUST have a workflow!"
|
marco@4
|
192 payload.close()
|
marco@4
|
193
|
marco@8
|
194 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata (only SimpleZip)
|
marco@7
|
195 if type == "SimpleZip":
|
marco@7
|
196 if entry is None:
|
marco@7
|
197 entry = Entry(dcterms_title=(os.path.basename(fileslist[0])))
|
marco@4
|
198 try:
|
marco@8
|
199 update_receipt = c.update(dr = deposit_receipt , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
|
marco@7
|
200 print "Metadata update successfull."
|
marco@4
|
201 except:
|
marco@4
|
202 print "Server error"
|
marco@8
|
203 # If we want to store the zip file along with the individual files (Only SimpleZip)
|
marco@8
|
204 if storezip:
|
marco@8
|
205 try:
|
marco@8
|
206 payload = open(fileslist[0],"rb")
|
marco@8
|
207 zipdeposit_receipt = c.add_file_to_resource(edit_media_iri = deposit_receipt.edit_media,
|
marco@8
|
208 payload = payload,
|
marco@8
|
209 filename = os.path.basename(fileslist[0]).replace(" ","_"),
|
marco@8
|
210 mimetype = 'application/zip',
|
marco@8
|
211 packaging = 'http://purl.org/net/sword/package/Binary')
|
marco@8
|
212 payload.close()
|
marco@8
|
213 print "Zip file successfully added to the bitstreams."
|
marco@8
|
214 except:
|
marco@8
|
215 print "Server error: could not add the zip file to the resources"
|
marco@4
|
216 if temp:
|
marco@4
|
217 os.remove(fileslist[0])
|
marco@0
|
218
|
marco@4
|
219 print "------------------------"
|
marco@4
|
220 print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
|
marco@4
|
221
|
marco@0
|
222
|
marco@0
|
223 else: # Failed to connect to SWORDv2 Server
|
marco@0
|
224 print "Couldn't connect to the server."
|
marco@0
|
225 if attempts == 0:
|
marco@0
|
226 print "Invalid credentials entered 3 times."
|
marco@0
|
227
|
marco@0
|
228 except KeyboardInterrupt:
|
marco@0
|
229 print "------------------------"
|
marco@3
|
230 print "\nSubmission aborted by user." |