marco@0
|
1 #!usr/bin/env/ python
|
marco@1
|
2
|
marco@6
|
3 """ SWORD2 DSpace bulk uploader - v0.3
|
marco@1
|
4
|
marco@1
|
5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
|
marco@1
|
6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
|
marco@1
|
7
|
marco@1
|
8 Dependencies:
|
marco@1
|
9
|
marco@1
|
10 - python 2.X
|
marco@1
|
11
|
marco@4
|
12 - sword2 library, with modifications:
|
marco@4
|
13 (original) https://bitbucket.org/beno/python-sword2/src
|
marco@4
|
14 (modified) https://code.soundsoftware.ac.uk/hg/sworduploader
|
marco@1
|
15
|
marco@1
|
16 -----------------------------------
|
marco@1
|
17 Copyright 2012 Marco Fabiani
|
marco@1
|
18 Copyright 2012 Queen Mary, University of London
|
marco@1
|
19 -----------------------------------
|
marco@1
|
20 """
|
marco@0
|
21
|
marco@4
|
22 import argparse, getpass, zipfile, os, sys
|
marco@0
|
23 from sword2 import *
|
marco@0
|
24
|
marco@0
|
25 # Parse arguments
|
marco@0
|
26 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
|
marco@0
|
27 parser.add_argument("data", type=str, nargs=1,
|
marco@1
|
28 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
|
marco@0
|
29 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
|
marco@4
|
30 parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.")
|
marco@0
|
31 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
|
marco@0
|
32 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
|
marco@0
|
33 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
|
marco@1
|
34 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
|
marco@0
|
35
|
marco@0
|
36 args = parser.parse_args()
|
marco@0
|
37 data = args.data[0]
|
marco@0
|
38 if args.dspaceurl == None:
|
marco@0
|
39 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
|
marco@0
|
40 else:
|
marco@0
|
41 dspaceurl = args.dspaceurl[0]
|
marco@0
|
42
|
marco@0
|
43
|
marco@0
|
44 try:
|
marco@0
|
45 # Connect to SWORD server
|
marco@0
|
46 attempts = 3 # Number of attempts left to connect to server
|
marco@0
|
47 connected = False
|
marco@0
|
48 while attempts>0 and not connected:
|
marco@0
|
49 print "Connecting to SWORD server. Remaining attempts: ", attempts
|
marco@0
|
50 # Try to login, get service document
|
marco@0
|
51 # Get username and password
|
marco@0
|
52 if args.user_name == None:
|
marco@0
|
53 user_name = raw_input("DSpace Username: ")
|
marco@0
|
54 else:
|
marco@0
|
55 user_name = args.user_name[0]
|
marco@0
|
56 print "DSpace Username: ",user_name
|
marco@0
|
57 user_pass = getpass.getpass("DSpace password:")
|
marco@0
|
58 # Connect to the server
|
marco@0
|
59 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
|
marco@3
|
60
|
marco@0
|
61 # Get service document
|
marco@0
|
62 try:
|
marco@0
|
63 c.get_service_document()
|
marco@3
|
64 except: # Server error
|
marco@3
|
65 print "Server unreachable!"
|
marco@3
|
66 break
|
marco@3
|
67 if c.sd != None:
|
marco@3
|
68 connected = True
|
marco@3
|
69 else:
|
marco@0
|
70 attempts-=1
|
marco@0
|
71 print "Incorrect username and/or password"
|
marco@3
|
72
|
marco@0
|
73
|
marco@0
|
74 if connected:
|
marco@0
|
75 # List available collections
|
marco@0
|
76 print "------------------------"
|
marco@0
|
77 print "Welcome to the ",c.workspaces[0][0], "repository"
|
marco@0
|
78 print "Available Collections: "
|
marco@0
|
79 numColl = len(c.workspaces[0][1])
|
marco@0
|
80 for ctr in range(numColl):
|
marco@0
|
81 coll = c.workspaces[0][1][ctr]
|
marco@0
|
82 print ctr+1,":",coll.title
|
marco@0
|
83 # Select a collection to deposit into
|
marco@0
|
84 sel = -1
|
marco@0
|
85 while (sel<=0 or sel>numColl):
|
marco@0
|
86 sel = input("Select a Collection to submit your files into: ")
|
marco@0
|
87 collection = c.workspaces[0][1][sel-1]
|
marco@0
|
88 print "Selected Collection: ",collection.title
|
marco@0
|
89
|
marco@4
|
90 # Create a submission
|
marco@4
|
91 fileslist = []
|
marco@4
|
92 temp = False # Delete temp files
|
marco@4
|
93 # If folder
|
marco@4
|
94 if os.path.isdir(data):
|
marco@4
|
95 if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only...
|
marco@4
|
96 dataname = os.path.basename(os.path.normpath(data))
|
marco@4
|
97 zipf = dataname+".zip"
|
marco@4
|
98 myzip = zipfile.ZipFile(zipf, "w")
|
marco@4
|
99 # get the directory structure
|
marco@4
|
100 print "Creating a zip archive for submission..."
|
marco@4
|
101 for root, dirs, files in os.walk(data):
|
marco@4
|
102 for name in files:
|
marco@4
|
103 myzip.write(os.path.join(root,name),
|
marco@4
|
104 os.path.relpath(os.path.join(root,name),data))
|
marco@4
|
105 fileslist.append(zipf)
|
marco@5
|
106 myzip.close()
|
marco@4
|
107 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@4
|
108 type = "SimpleZip"
|
marco@4
|
109 temp = True
|
marco@4
|
110 else: #create a list of files to upload
|
marco@4
|
111 for root, dirs, files in os.walk(data):
|
marco@4
|
112 for name in files:
|
marco@4
|
113 fileslist.append(os.path.join(root,name))
|
marco@4
|
114 type = "multiple files"
|
marco@4
|
115 elif zipfile.is_zipfile(data): #This is a zip file
|
marco@4
|
116 fileslist.append(data)
|
marco@4
|
117 myzip = zipfile.ZipFile(data)
|
marco@4
|
118 if "mets.xml" in myzip.namelist(): #This is a METS package
|
marco@4
|
119 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
|
marco@4
|
120 type = "METS"
|
marco@4
|
121 else: #THis is a simple zip file
|
marco@4
|
122 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@4
|
123 type = "SimpleZip"
|
marco@4
|
124 myzip.close()
|
marco@0
|
125 elif os.path.isfile(data): # This is a single file
|
marco@4
|
126 fileslist.append(data)
|
marco@4
|
127 type = "single file"
|
marco@0
|
128 else:
|
marco@0
|
129 print "Couldn't find the data."
|
marco@0
|
130 sys.exit()
|
marco@0
|
131
|
marco@0
|
132 print "------------------------"
|
marco@0
|
133 print "This is a ",type," submission"
|
marco@4
|
134
|
marco@4
|
135 # Create a metadata entry
|
marco@4
|
136 if (args.title != None) or (args.author != None) or (args.date != None):
|
marco@4
|
137 entry = Entry()
|
marco@4
|
138 if args.title != None:
|
marco@4
|
139 entry.add_fields(dcterms_title = args.title[0])
|
marco@4
|
140 if args.author != None:
|
marco@4
|
141 for creator in args.author:
|
marco@4
|
142 entry.add_fields(dcterms_creator=creator)
|
marco@4
|
143 if args.date != None:
|
marco@4
|
144 entry.add_fields(dcterms_created = args.date[0])
|
marco@4
|
145 else:
|
marco@4
|
146 entry = None
|
marco@4
|
147 # Select what to do
|
marco@4
|
148 if (type is "single file") or (type is "multiple files"):
|
marco@4
|
149 try:
|
marco@4
|
150 # Create the metadata entry with ATOM
|
marco@4
|
151 print "------------------------"
|
marco@4
|
152 print "Creating the item..."
|
marco@4
|
153 if entry is None:
|
marco@4
|
154 entry = Entry(dcterms_title=(os.path.basename(data)))
|
marco@4
|
155 creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry)
|
marco@4
|
156
|
marco@4
|
157 # Add the files
|
marco@4
|
158 for f in fileslist:
|
marco@4
|
159 print "Uploading file ",os.path.basename(f)
|
marco@4
|
160 payload = open(f,"rb")
|
marco@4
|
161 deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media,
|
marco@4
|
162 payload = payload,
|
marco@4
|
163 filename = os.path.basename(f),
|
marco@4
|
164 mimetype = 'application/zip',
|
marco@4
|
165 packaging = 'http://purl.org/net/sword/package/Binary')
|
marco@4
|
166 payload.close()
|
marco@4
|
167 except HTTPResponseError:
|
marco@4
|
168 print "Bad request"
|
marco@4
|
169 else:
|
marco@4
|
170 # PUT the data
|
marco@4
|
171 payload = open(fileslist[0], "rb")
|
marco@4
|
172 try:
|
marco@4
|
173 deposit_receipt = c.create(col_iri = collection.href,
|
marco@4
|
174 payload = payload,
|
marco@4
|
175 filename = fileslist[0],
|
marco@4
|
176 mimetype = "application/zip",
|
marco@4
|
177 packaging = packaging)
|
marco@4
|
178 print type, " submission successful."
|
marco@4
|
179 except:
|
marco@4
|
180 print "Error! Couldn't submit the file!"
|
marco@4
|
181 if type == "METS": # Just guessing: not sure this is the problem...
|
marco@4
|
182 print "To submit a METS package, the collection MUST have a workflow!"
|
marco@4
|
183 payload.close()
|
marco@4
|
184
|
marco@4
|
185 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
|
marco@4
|
186 if type == "SimpleZip" and entry != None:
|
marco@4
|
187 try:
|
marco@4
|
188 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
|
marco@4
|
189 print "Additional metadata updated successfully."
|
marco@4
|
190 except:
|
marco@4
|
191 print "Server error"
|
marco@4
|
192 if temp:
|
marco@4
|
193 os.remove(fileslist[0])
|
marco@0
|
194
|
marco@4
|
195 print "------------------------"
|
marco@4
|
196 print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
|
marco@4
|
197
|
marco@0
|
198
|
marco@0
|
199 else: # Failed to connect to SWORDv2 Server
|
marco@0
|
200 print "Couldn't connect to the server."
|
marco@0
|
201 if attempts == 0:
|
marco@0
|
202 print "Invalid credentials entered 3 times."
|
marco@0
|
203
|
marco@0
|
204 except KeyboardInterrupt:
|
marco@0
|
205 print "------------------------"
|
marco@3
|
206 print "\nSubmission aborted by user." |