marco@0
|
1 #!usr/bin/env/ python
|
marco@1
|
2
|
marco@3
|
3 """ SWORD2 DSpace bulk uploader - v0.2
|
marco@1
|
4
|
marco@1
|
5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
|
marco@1
|
6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
|
marco@1
|
7
|
marco@1
|
8 Dependencies:
|
marco@1
|
9
|
marco@1
|
10 - python 2.X
|
marco@1
|
11
|
marco@1
|
12 - sword2 library: https://bitbucket.org/beno/python-sword2/src
|
marco@1
|
13
|
marco@1
|
14 -----------------------------------
|
marco@1
|
15 Copyright 2012 Marco Fabiani
|
marco@1
|
16 Copyright 2012 Queen Mary, University of London
|
marco@1
|
17 -----------------------------------
|
marco@1
|
18 """
|
marco@0
|
19
|
marco@0
|
20 import argparse, getpass, zipfile,os,sys
|
marco@0
|
21 from sword2 import *
|
marco@0
|
22
|
marco@0
|
23 # Parse arguments
|
marco@0
|
24 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
|
marco@0
|
25 parser.add_argument("data", type=str, nargs=1,
|
marco@1
|
26 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
|
marco@0
|
27 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
|
marco@0
|
28 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
|
marco@0
|
29 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
|
marco@0
|
30 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
|
marco@1
|
31 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
|
marco@0
|
32
|
marco@0
|
33 args = parser.parse_args()
|
marco@0
|
34 data = args.data[0]
|
marco@0
|
35 if args.dspaceurl == None:
|
marco@0
|
36 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
|
marco@0
|
37 else:
|
marco@0
|
38 dspaceurl = args.dspaceurl[0]
|
marco@0
|
39
|
marco@0
|
40
|
marco@0
|
41 try:
|
marco@0
|
42 # Connect to SWORD server
|
marco@0
|
43 attempts = 3 # Number of attempts left to connect to server
|
marco@0
|
44 connected = False
|
marco@0
|
45 while attempts>0 and not connected:
|
marco@0
|
46 print "Connecting to SWORD server. Remaining attempts: ", attempts
|
marco@0
|
47 # Try to login, get service document
|
marco@0
|
48 # Get username and password
|
marco@0
|
49 if args.user_name == None:
|
marco@0
|
50 user_name = raw_input("DSpace Username: ")
|
marco@0
|
51 else:
|
marco@0
|
52 user_name = args.user_name[0]
|
marco@0
|
53 print "DSpace Username: ",user_name
|
marco@0
|
54 user_pass = getpass.getpass("DSpace password:")
|
marco@0
|
55 # Connect to the server
|
marco@0
|
56 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
|
marco@3
|
57
|
marco@0
|
58 # Get service document
|
marco@0
|
59 try:
|
marco@0
|
60 c.get_service_document()
|
marco@3
|
61 except: # Server error
|
marco@3
|
62 print "Server unreachable!"
|
marco@3
|
63 break
|
marco@3
|
64 if c.sd != None:
|
marco@3
|
65 connected = True
|
marco@3
|
66 else:
|
marco@0
|
67 attempts-=1
|
marco@0
|
68 print "Incorrect username and/or password"
|
marco@3
|
69
|
marco@0
|
70
|
marco@0
|
71 if connected:
|
marco@0
|
72 # List available collections
|
marco@0
|
73 print "------------------------"
|
marco@0
|
74 print "Welcome to the ",c.workspaces[0][0], "repository"
|
marco@0
|
75 print "Available Collections: "
|
marco@0
|
76 numColl = len(c.workspaces[0][1])
|
marco@0
|
77 for ctr in range(numColl):
|
marco@0
|
78 coll = c.workspaces[0][1][ctr]
|
marco@0
|
79 print ctr+1,":",coll.title
|
marco@0
|
80 # Select a collection to deposit into
|
marco@0
|
81 sel = -1
|
marco@0
|
82 while (sel<=0 or sel>numColl):
|
marco@0
|
83 sel = input("Select a Collection to submit your files into: ")
|
marco@0
|
84 collection = c.workspaces[0][1][sel-1]
|
marco@0
|
85 print "Selected Collection: ",collection.title
|
marco@0
|
86
|
marco@0
|
87 # Create a submission: build the zip files
|
marco@0
|
88 temp = True # delete the zip file at the end of the upload
|
marco@0
|
89 if zipfile.is_zipfile(data):
|
marco@0
|
90 zipf = data
|
marco@0
|
91 temp = False
|
marco@0
|
92 elif os.path.isfile(data): # This is a single file
|
marco@0
|
93 dataname = os.path.basename(data)
|
marco@0
|
94 zipf = os.path.splitext(dataname)[0]+".zip"
|
marco@0
|
95 myzip = zipfile.ZipFile(zipf, "w")
|
marco@3
|
96 myzip.write(data,os.path.basename(data))
|
marco@0
|
97 myzip.close()
|
marco@3
|
98 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only...
|
marco@0
|
99 dataname = os.path.basename(os.path.normpath(data))
|
marco@0
|
100 zipf = dataname+".zip"
|
marco@0
|
101 myzip = zipfile.ZipFile(zipf, "w")
|
marco@0
|
102 # get the directory structure
|
marco@3
|
103 print "Creating a zip archive for submission..."
|
marco@0
|
104 for root, dirs, files in os.walk(data):
|
marco@0
|
105 for name in files:
|
marco@3
|
106 myzip.write(os.path.join(root,name),
|
marco@3
|
107 os.path.relpath(os.path.join(root,name),data))
|
marco@0
|
108 myzip.close()
|
marco@0
|
109 else:
|
marco@0
|
110 print "Couldn't find the data."
|
marco@0
|
111 sys.exit()
|
marco@0
|
112
|
marco@0
|
113 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
|
marco@0
|
114 myzip = zipfile.ZipFile(zipf)
|
marco@0
|
115 if "mets.xml" in myzip.namelist():
|
marco@0
|
116 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
|
marco@0
|
117 type = "METS"
|
marco@0
|
118 else:
|
marco@0
|
119 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@0
|
120 type = "SimpleZip"
|
marco@0
|
121
|
marco@0
|
122 print "------------------------"
|
marco@0
|
123 print "This is a ",type," submission"
|
marco@3
|
124 print "Uploading files..."
|
marco@0
|
125 myzip.close()
|
marco@0
|
126
|
marco@0
|
127 payload = open(zipf, "rb")
|
marco@0
|
128 try:
|
marco@0
|
129 receipt_dep = c.create(col_iri = collection.href,
|
marco@0
|
130 payload = payload,
|
marco@0
|
131 filename = zipf,
|
marco@0
|
132 mimetype = "application/zip",
|
marco@0
|
133 packaging = packaging)
|
marco@0
|
134 print type, " submission successful."
|
marco@0
|
135 if type == "SimpleZip":
|
marco@0
|
136 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
|
marco@0
|
137 if (args.title != None) or (args.author != None) or (args.date != None):
|
marco@0
|
138 entry = Entry()
|
marco@0
|
139 print "------------------------"
|
marco@0
|
140 print "Updating with additional metadata"
|
marco@0
|
141 if args.title != None:
|
marco@0
|
142 entry.add_fields(dcterms_title = args.title[0])
|
marco@0
|
143 if args.author != None:
|
marco@0
|
144 for creator in args.author:
|
marco@0
|
145 entry.add_fields(dcterms_creator=creator)
|
marco@0
|
146 if args.date != None:
|
marco@0
|
147 entry.add_fields(dcterms_created = args.date[0])
|
marco@0
|
148 try:
|
marco@0
|
149 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
|
marco@0
|
150 print "Additional metadata updated successfully."
|
marco@0
|
151 except:
|
marco@0
|
152 print "Server error"
|
marco@0
|
153 print "------------------------"
|
marco@0
|
154 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
|
marco@0
|
155 except:
|
marco@0
|
156 print "Error! Couldn't submit the file!"
|
marco@0
|
157 if type == "METS": # Just guessing: not sure this is the problem...
|
marco@0
|
158 print "To submit a METS package, the collection MUST have a workflow!"
|
marco@0
|
159 payload.close()
|
marco@3
|
160 if temp:
|
marco@3
|
161 os.remove(zipf)
|
marco@0
|
162
|
marco@0
|
163 else: # Failed to connect to SWORDv2 Server
|
marco@0
|
164 print "Couldn't connect to the server."
|
marco@0
|
165 if attempts == 0:
|
marco@0
|
166 print "Invalid credentials entered 3 times."
|
marco@0
|
167
|
marco@0
|
168 except KeyboardInterrupt:
|
marco@0
|
169 print "------------------------"
|
marco@3
|
170 print "\nSubmission aborted by user." |