marco@0
|
1 #!usr/bin/env/ python
|
marco@1
|
2
|
marco@1
|
3 """ SWORD2 DSpace bulk uploader
|
marco@1
|
4
|
marco@1
|
5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x.
|
marco@1
|
6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview
|
marco@1
|
7
|
marco@1
|
8 Dependencies:
|
marco@1
|
9
|
marco@1
|
10 - python 2.X
|
marco@1
|
11
|
marco@1
|
12 - sword2 library: https://bitbucket.org/beno/python-sword2/src
|
marco@1
|
13
|
marco@1
|
14 -----------------------------------
|
marco@1
|
15 Copyright 2012 Marco Fabiani
|
marco@1
|
16 Copyright 2012 Queen Mary, University of London
|
marco@1
|
17 -----------------------------------
|
marco@1
|
18 """
|
marco@0
|
19
|
marco@0
|
20 import argparse, getpass, zipfile,os,sys
|
marco@0
|
21 from sword2 import *
|
marco@0
|
22
|
marco@0
|
23 # Parse arguments
|
marco@0
|
24 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
|
marco@0
|
25 parser.add_argument("data", type=str, nargs=1,
|
marco@1
|
26 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!")
|
marco@0
|
27 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
|
marco@0
|
28 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
|
marco@0
|
29 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
|
marco@0
|
30 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
|
marco@1
|
31 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument")
|
marco@0
|
32
|
marco@0
|
33 args = parser.parse_args()
|
marco@0
|
34 data = args.data[0]
|
marco@0
|
35 if args.dspaceurl == None:
|
marco@0
|
36 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
|
marco@0
|
37 else:
|
marco@0
|
38 dspaceurl = args.dspaceurl[0]
|
marco@0
|
39
|
marco@0
|
40
|
marco@0
|
41 try:
|
marco@0
|
42 # Connect to SWORD server
|
marco@0
|
43 attempts = 3 # Number of attempts left to connect to server
|
marco@0
|
44 connected = False
|
marco@0
|
45 while attempts>0 and not connected:
|
marco@0
|
46 print "Connecting to SWORD server. Remaining attempts: ", attempts
|
marco@0
|
47 # Try to login, get service document
|
marco@0
|
48 # Get username and password
|
marco@0
|
49 if args.user_name == None:
|
marco@0
|
50 user_name = raw_input("DSpace Username: ")
|
marco@0
|
51 else:
|
marco@0
|
52 user_name = args.user_name[0]
|
marco@0
|
53 print "DSpace Username: ",user_name
|
marco@0
|
54 user_pass = getpass.getpass("DSpace password:")
|
marco@0
|
55 # Connect to the server
|
marco@0
|
56 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
|
marco@0
|
57 # Get service document
|
marco@0
|
58 try:
|
marco@0
|
59 c.get_service_document()
|
marco@0
|
60 except: # Could be Forbidden if the exception was raised
|
marco@0
|
61 attempts-=1
|
marco@0
|
62 print "Incorrect username and/or password"
|
marco@0
|
63 if c.sd != None:
|
marco@0
|
64 connected = True
|
marco@0
|
65
|
marco@0
|
66 if connected:
|
marco@0
|
67 # List available collections
|
marco@0
|
68 print "------------------------"
|
marco@0
|
69 print "Welcome to the ",c.workspaces[0][0], "repository"
|
marco@0
|
70 print "Available Collections: "
|
marco@0
|
71 numColl = len(c.workspaces[0][1])
|
marco@0
|
72 for ctr in range(numColl):
|
marco@0
|
73 coll = c.workspaces[0][1][ctr]
|
marco@0
|
74 print ctr+1,":",coll.title
|
marco@0
|
75 # Select a collection to deposit into
|
marco@0
|
76 sel = -1
|
marco@0
|
77 while (sel<=0 or sel>numColl):
|
marco@0
|
78 sel = input("Select a Collection to submit your files into: ")
|
marco@0
|
79 collection = c.workspaces[0][1][sel-1]
|
marco@0
|
80 print "Selected Collection: ",collection.title
|
marco@0
|
81
|
marco@0
|
82 # Create a submission: build the zip files
|
marco@0
|
83 temp = True # delete the zip file at the end of the upload
|
marco@0
|
84 if zipfile.is_zipfile(data):
|
marco@0
|
85 zipf = data
|
marco@0
|
86 temp = False
|
marco@0
|
87 elif os.path.isfile(data): # This is a single file
|
marco@0
|
88 dataname = os.path.basename(data)
|
marco@0
|
89 zipf = os.path.splitext(dataname)[0]+".zip"
|
marco@0
|
90 myzip = zipfile.ZipFile(zipf, "w")
|
marco@0
|
91 myzip.write(data)
|
marco@0
|
92 myzip.close()
|
marco@0
|
93 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure!
|
marco@0
|
94 dataname = os.path.basename(os.path.normpath(data))
|
marco@0
|
95 zipf = dataname+".zip"
|
marco@0
|
96 myzip = zipfile.ZipFile(zipf, "w")
|
marco@0
|
97 # get the directory structure
|
marco@0
|
98 for root, dirs, files in os.walk(data):
|
marco@0
|
99 for name in files:
|
marco@0
|
100 myzip.write(os.path.join(root,name))
|
marco@0
|
101 myzip.close()
|
marco@0
|
102 else:
|
marco@0
|
103 print "Couldn't find the data."
|
marco@0
|
104 sys.exit()
|
marco@0
|
105
|
marco@0
|
106 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
|
marco@0
|
107 myzip = zipfile.ZipFile(zipf)
|
marco@0
|
108 if "mets.xml" in myzip.namelist():
|
marco@0
|
109 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
|
marco@0
|
110 type = "METS"
|
marco@0
|
111 else:
|
marco@0
|
112 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@0
|
113 type = "SimpleZip"
|
marco@0
|
114
|
marco@0
|
115 print "------------------------"
|
marco@0
|
116 print "This is a ",type," submission"
|
marco@0
|
117 myzip.close()
|
marco@0
|
118
|
marco@0
|
119 payload = open(zipf, "rb")
|
marco@0
|
120 try:
|
marco@0
|
121 receipt_dep = c.create(col_iri = collection.href,
|
marco@0
|
122 payload = payload,
|
marco@0
|
123 filename = zipf,
|
marco@0
|
124 mimetype = "application/zip",
|
marco@0
|
125 packaging = packaging)
|
marco@0
|
126 print type, " submission successful."
|
marco@0
|
127 if type == "SimpleZip":
|
marco@0
|
128 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
|
marco@0
|
129 if (args.title != None) or (args.author != None) or (args.date != None):
|
marco@0
|
130 entry = Entry()
|
marco@0
|
131 print "------------------------"
|
marco@0
|
132 print "Updating with additional metadata"
|
marco@0
|
133 if args.title != None:
|
marco@0
|
134 entry.add_fields(dcterms_title = args.title[0])
|
marco@0
|
135 if args.author != None:
|
marco@0
|
136 for creator in args.author:
|
marco@0
|
137 entry.add_fields(dcterms_creator=creator)
|
marco@0
|
138 if args.date != None:
|
marco@0
|
139 entry.add_fields(dcterms_created = args.date[0])
|
marco@0
|
140 try:
|
marco@0
|
141 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
|
marco@0
|
142 print "Additional metadata updated successfully."
|
marco@0
|
143 except:
|
marco@0
|
144 print "Server error"
|
marco@0
|
145 print "------------------------"
|
marco@0
|
146 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
|
marco@0
|
147 except:
|
marco@0
|
148 print "Error! Couldn't submit the file!"
|
marco@0
|
149 if type == "METS": # Just guessing: not sure this is the problem...
|
marco@0
|
150 print "To submit a METS package, the collection MUST have a workflow!"
|
marco@0
|
151 payload.close()
|
marco@0
|
152
|
marco@0
|
153 else: # Failed to connect to SWORDv2 Server
|
marco@0
|
154 print "Couldn't connect to the server."
|
marco@0
|
155 if attempts == 0:
|
marco@0
|
156 print "Invalid credentials entered 3 times."
|
marco@0
|
157 if temp:
|
marco@0
|
158 os.remove(zipf)
|
marco@0
|
159
|
marco@0
|
160 except KeyboardInterrupt:
|
marco@0
|
161 print "------------------------"
|
marco@0
|
162 print "\nSubmission aborted by user."
|
marco@0
|
163
|