marco@0
|
1 #!usr/bin/env/ python
|
marco@1
|
2
|
marco@4
|
3 """ SWORD2 DSpace bulk uploader - v0.3
|
marco@1
|
4
|
marco@1
|
5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
|
marco@1
|
6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
|
marco@1
|
7
|
marco@1
|
8 Dependencies:
|
marco@1
|
9
|
marco@1
|
10 - python 2.X
|
marco@1
|
11
|
marco@4
|
12 - sword2 library, with modifications:
|
marco@4
|
13 (original) https://bitbucket.org/beno/python-sword2/src
|
marco@4
|
14 (modified) https://code.soundsoftware.ac.uk/hg/sworduploader
|
marco@1
|
15
|
marco@1
|
16 -----------------------------------
|
marco@1
|
17 Copyright 2012 Marco Fabiani
|
marco@1
|
18 Copyright 2012 Queen Mary, University of London
|
marco@1
|
19 -----------------------------------
|
marco@1
|
20 """
|
marco@0
|
21
|
marco@4
|
22 import argparse, getpass, zipfile, os, sys
|
marco@0
|
23 from sword2 import *
|
marco@0
|
24
|
marco@0
|
25 # Parse arguments
|
marco@0
|
26 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
|
marco@0
|
27 parser.add_argument("data", type=str, nargs=1,
|
marco@1
|
28 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
|
marco@0
|
29 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
|
marco@4
|
30 parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.")
|
marco@0
|
31 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
|
marco@0
|
32 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
|
marco@0
|
33 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
|
marco@1
|
34 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
|
marco@0
|
35
|
marco@0
|
36 args = parser.parse_args()
|
marco@0
|
37 data = args.data[0]
|
marco@0
|
38 if args.dspaceurl == None:
|
marco@0
|
39 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
|
marco@0
|
40 else:
|
marco@0
|
41 dspaceurl = args.dspaceurl[0]
|
marco@0
|
42
|
marco@0
|
43
|
marco@0
|
44 try:
|
marco@0
|
45 # Connect to SWORD server
|
marco@0
|
46 attempts = 3 # Number of attempts left to connect to server
|
marco@0
|
47 connected = False
|
marco@0
|
48 while attempts>0 and not connected:
|
marco@0
|
49 print "Connecting to SWORD server. Remaining attempts: ", attempts
|
marco@0
|
50 # Try to login, get service document
|
marco@0
|
51 # Get username and password
|
marco@0
|
52 if args.user_name == None:
|
marco@0
|
53 user_name = raw_input("DSpace Username: ")
|
marco@0
|
54 else:
|
marco@0
|
55 user_name = args.user_name[0]
|
marco@0
|
56 print "DSpace Username: ",user_name
|
marco@0
|
57 user_pass = getpass.getpass("DSpace password:")
|
marco@0
|
58 # Connect to the server
|
marco@0
|
59 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
|
marco@3
|
60
|
marco@0
|
61 # Get service document
|
marco@0
|
62 try:
|
marco@0
|
63 c.get_service_document()
|
marco@3
|
64 except: # Server error
|
marco@3
|
65 print "Server unreachable!"
|
marco@3
|
66 break
|
marco@3
|
67 if c.sd != None:
|
marco@3
|
68 connected = True
|
marco@3
|
69 else:
|
marco@0
|
70 attempts-=1
|
marco@0
|
71 print "Incorrect username and/or password"
|
marco@3
|
72
|
marco@0
|
73
|
marco@0
|
74 if connected:
|
marco@0
|
75 # List available collections
|
marco@0
|
76 print "------------------------"
|
marco@0
|
77 print "Welcome to the ",c.workspaces[0][0], "repository"
|
marco@0
|
78 print "Available Collections: "
|
marco@0
|
79 numColl = len(c.workspaces[0][1])
|
marco@0
|
80 for ctr in range(numColl):
|
marco@0
|
81 coll = c.workspaces[0][1][ctr]
|
marco@0
|
82 print ctr+1,":",coll.title
|
marco@0
|
83 # Select a collection to deposit into
|
marco@0
|
84 sel = -1
|
marco@0
|
85 while (sel<=0 or sel>numColl):
|
marco@0
|
86 sel = input("Select a Collection to submit your files into: ")
|
marco@0
|
87 collection = c.workspaces[0][1][sel-1]
|
marco@0
|
88 print "Selected Collection: ",collection.title
|
marco@0
|
89
|
marco@4
|
90 # Create a submission
|
marco@4
|
91 fileslist = []
|
marco@4
|
92 temp = False # Delete temp files
|
marco@4
|
93 # If folder
|
marco@4
|
94 if os.path.isdir(data):
|
marco@4
|
95 if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
|
marco@4
|
96 dataname = os.path.basename(os.path.normpath(data))
|
marco@4
|
97 zipf = dataname+".zip"
|
marco@4
|
98 myzip = zipfile.ZipFile(zipf, "w")
|
marco@4
|
99 # get the directory structure
|
marco@4
|
100 print "Creating a zip archive for submission..."
|
marco@4
|
101 for root, dirs, files in os.walk(data):
|
marco@4
|
102 for name in files:
|
marco@4
|
103 myzip.write(os.path.join(root,name),
|
marco@4
|
104 os.path.relpath(os.path.join(root,name),data))
|
marco@4
|
105 fileslist.append(zipf)
|
marco@4
|
106 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@4
|
107 type = "SimpleZip"
|
marco@4
|
108 temp = True
|
marco@4
|
109 else: #create a list of files to upload
|
marco@4
|
110 for root, dirs, files in os.walk(data):
|
marco@4
|
111 for name in files:
|
marco@4
|
112 fileslist.append(os.path.join(root,name))
|
marco@4
|
113 type = "multiple files"
|
marco@4
|
114 elif zipfile.is_zipfile(data): #This is a zip file
|
marco@4
|
115 fileslist.append(data)
|
marco@4
|
116 myzip = zipfile.ZipFile(data)
|
marco@4
|
117 if "mets.xml" in myzip.namelist(): #This is a METS package
|
marco@4
|
118 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
|
marco@4
|
119 type = "METS"
|
marco@4
|
120 else: #THis is a simple zip file
|
marco@4
|
121 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@4
|
122 type = "SimpleZip"
|
marco@4
|
123 myzip.close()
|
marco@0
|
124 elif os.path.isfile(data): # This is a single file
|
marco@4
|
125 fileslist.append(data)
|
marco@4
|
126 type = "single file"
|
marco@0
|
127 else:
|
marco@0
|
128 print "Couldn't find the data."
|
marco@0
|
129 sys.exit()
|
marco@0
|
130
|
marco@0
|
131 print "------------------------"
|
marco@0
|
132 print "This is a ",type," submission"
|
marco@4
|
133
|
marco@4
|
134 # Create a metadata entry
|
marco@4
|
135 if (args.title != None) or (args.author != None) or (args.date != None):
|
marco@4
|
136 entry = Entry()
|
marco@4
|
137 if args.title != None:
|
marco@4
|
138 entry.add_fields(dcterms_title = args.title[0])
|
marco@4
|
139 if args.author != None:
|
marco@4
|
140 for creator in args.author:
|
marco@4
|
141 entry.add_fields(dcterms_creator=creator)
|
marco@4
|
142 if args.date != None:
|
marco@4
|
143 entry.add_fields(dcterms_created = args.date[0])
|
marco@4
|
144 else:
|
marco@4
|
145 entry = None
|
marco@4
|
146 # Select what to do
|
marco@4
|
147 if (type is "single file") or (type is "multiple files"):
|
marco@4
|
148 try:
|
marco@4
|
149 # Create the metadata entry with ATOM
|
marco@4
|
150 print "------------------------"
|
marco@4
|
151 print "Creating the item..."
|
marco@4
|
152 if entry is None:
|
marco@4
|
153 entry = Entry(dcterms_title=(os.path.basename(data)))
|
marco@4
|
154 creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry)
|
marco@4
|
155
|
marco@4
|
156 # Add the files
|
marco@4
|
157 for f in fileslist:
|
marco@4
|
158 print "Uploading file ",os.path.basename(f)
|
marco@4
|
159 payload = open(f,"rb")
|
marco@4
|
160 deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
|
marco@4
|
161 payload = payload,
|
marco@4
|
162 filename = os.path.basename(f),
|
marco@4
|
163 mimetype = 'application/zip',
|
marco@4
|
164 packaging = 'http://purl.org/net/sword/package/Binary')
|
marco@4
|
165 payload.close()
|
marco@4
|
166 except HTTPResponseError:
|
marco@4
|
167 print "Bad request"
|
marco@4
|
168 else:
|
marco@4
|
169 # PUT the data
|
marco@4
|
170 payload = open(fileslist[0], "rb")
|
marco@4
|
171 try:
|
marco@4
|
172 deposit_receipt = c.create(col_iri = collection.href,
|
marco@4
|
173 payload = payload,
|
marco@4
|
174 filename = fileslist[0],
|
marco@4
|
175 mimetype = "application/zip",
|
marco@4
|
176 packaging = packaging)
|
marco@4
|
177 print type, " submission successful."
|
marco@4
|
178 except:
|
marco@4
|
179 print "Error! Couldn't submit the file!"
|
marco@4
|
180 if type == "METS": # Just guessing: not sure this is the problem...
|
marco@4
|
181 print "To submit a METS package, the collection MUST have a workflow!"
|
marco@4
|
182 payload.close()
|
marco@4
|
183
|
marco@4
|
184 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
|
marco@4
|
185 if type == "SimpleZip" and entry != None:
|
marco@4
|
186 try:
|
marco@4
|
187 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
|
marco@4
|
188 print "Additional metadata updated successfully."
|
marco@4
|
189 except:
|
marco@4
|
190 print "Server error"
|
marco@4
|
191 if temp:
|
marco@4
|
192 os.remove(fileslist[0])
|
marco@0
|
193
|
marco@4
|
194 print "------------------------"
|
marco@4
|
195 print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
|
marco@4
|
196
|
marco@0
|
197
|
marco@0
|
198 else: # Failed to connect to SWORDv2 Server
|
marco@0
|
199 print "Couldn't connect to the server."
|
marco@0
|
200 if attempts == 0:
|
marco@0
|
201 print "Invalid credentials entered 3 times."
|
marco@0
|
202
|
marco@0
|
203 except KeyboardInterrupt:
|
marco@0
|
204 print "------------------------"
|
marco@3
|
205 print "\nSubmission aborted by user." |