marco@0
|
1 #!usr/bin/env/ python
|
marco@0
|
2 # dspaceuploader.py
|
marco@0
|
3 # Copyright: Marco Fabiani, Centre for Digital Music, Queen Mary University of London
|
marco@0
|
4 # License: XXXXXX
|
marco@0
|
5
|
marco@0
|
6 import argparse, getpass, zipfile,os,sys
|
marco@0
|
7 from sword2 import *
|
marco@0
|
8
|
marco@0
|
9 # Parse arguments
|
marco@0
|
10 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.")
|
marco@0
|
11 parser.add_argument("data", type=str, nargs=1,
|
marco@0
|
12 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files")
|
marco@0
|
13 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.")
|
marco@0
|
14 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).")
|
marco@0
|
15 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"")
|
marco@0
|
16 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).")
|
marco@0
|
17 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: c4dm).")
|
marco@0
|
18
|
marco@0
|
19 args = parser.parse_args()
|
marco@0
|
20 data = args.data[0]
|
marco@0
|
21 if args.dspaceurl == None:
|
marco@0
|
22 dspaceurl = "http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument"
|
marco@0
|
23 else:
|
marco@0
|
24 dspaceurl = args.dspaceurl[0]
|
marco@0
|
25
|
marco@0
|
26
|
marco@0
|
27 try:
|
marco@0
|
28 # Connect to SWORD server
|
marco@0
|
29 attempts = 3 # Number of attempts left to connect to server
|
marco@0
|
30 connected = False
|
marco@0
|
31 while attempts>0 and not connected:
|
marco@0
|
32 print "Connecting to SWORD server. Remaining attempts: ", attempts
|
marco@0
|
33 # Try to login, get service document
|
marco@0
|
34 # Get username and password
|
marco@0
|
35 if args.user_name == None:
|
marco@0
|
36 user_name = raw_input("DSpace Username: ")
|
marco@0
|
37 else:
|
marco@0
|
38 user_name = args.user_name[0]
|
marco@0
|
39 print "DSpace Username: ",user_name
|
marco@0
|
40 user_pass = getpass.getpass("DSpace password:")
|
marco@0
|
41 # Connect to the server
|
marco@0
|
42 c = Connection(dspaceurl, user_name=user_name, user_pass=user_pass,keep_history=False)
|
marco@0
|
43 # Get service document
|
marco@0
|
44 try:
|
marco@0
|
45 c.get_service_document()
|
marco@0
|
46 except: # Could be Forbidden if the exception was raised
|
marco@0
|
47 attempts-=1
|
marco@0
|
48 print "Incorrect username and/or password"
|
marco@0
|
49 if c.sd != None:
|
marco@0
|
50 connected = True
|
marco@0
|
51
|
marco@0
|
52 if connected:
|
marco@0
|
53 # List available collections
|
marco@0
|
54 print "------------------------"
|
marco@0
|
55 print "Welcome to the ",c.workspaces[0][0], "repository"
|
marco@0
|
56 print "Available Collections: "
|
marco@0
|
57 numColl = len(c.workspaces[0][1])
|
marco@0
|
58 for ctr in range(numColl):
|
marco@0
|
59 coll = c.workspaces[0][1][ctr]
|
marco@0
|
60 print ctr+1,":",coll.title
|
marco@0
|
61 # Select a collection to deposit into
|
marco@0
|
62 sel = -1
|
marco@0
|
63 while (sel<=0 or sel>numColl):
|
marco@0
|
64 sel = input("Select a Collection to submit your files into: ")
|
marco@0
|
65 collection = c.workspaces[0][1][sel-1]
|
marco@0
|
66 print "Selected Collection: ",collection.title
|
marco@0
|
67
|
marco@0
|
68 # Create a submission: build the zip files
|
marco@0
|
69 temp = True # delete the zip file at the end of the upload
|
marco@0
|
70 if zipfile.is_zipfile(data):
|
marco@0
|
71 zipf = data
|
marco@0
|
72 temp = False
|
marco@0
|
73 elif os.path.isfile(data): # This is a single file
|
marco@0
|
74 dataname = os.path.basename(data)
|
marco@0
|
75 zipf = os.path.splitext(dataname)[0]+".zip"
|
marco@0
|
76 myzip = zipfile.ZipFile(zipf, "w")
|
marco@0
|
77 myzip.write(data)
|
marco@0
|
78 myzip.close()
|
marco@0
|
79 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure!
|
marco@0
|
80 dataname = os.path.basename(os.path.normpath(data))
|
marco@0
|
81 zipf = dataname+".zip"
|
marco@0
|
82 myzip = zipfile.ZipFile(zipf, "w")
|
marco@0
|
83 # get the directory structure
|
marco@0
|
84 for root, dirs, files in os.walk(data):
|
marco@0
|
85 for name in files:
|
marco@0
|
86 myzip.write(os.path.join(root,name))
|
marco@0
|
87 myzip.close()
|
marco@0
|
88 else:
|
marco@0
|
89 print "Couldn't find the data."
|
marco@0
|
90 sys.exit()
|
marco@0
|
91
|
marco@0
|
92 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip
|
marco@0
|
93 myzip = zipfile.ZipFile(zipf)
|
marco@0
|
94 if "mets.xml" in myzip.namelist():
|
marco@0
|
95 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP"
|
marco@0
|
96 type = "METS"
|
marco@0
|
97 else:
|
marco@0
|
98 packaging = "http://purl.org/net/sword/package/SimpleZip"
|
marco@0
|
99 type = "SimpleZip"
|
marco@0
|
100
|
marco@0
|
101 print "------------------------"
|
marco@0
|
102 print "This is a ",type," submission"
|
marco@0
|
103 myzip.close()
|
marco@0
|
104
|
marco@0
|
105 payload = open(zipf, "rb")
|
marco@0
|
106 try:
|
marco@0
|
107 receipt_dep = c.create(col_iri = collection.href,
|
marco@0
|
108 payload = payload,
|
marco@0
|
109 filename = zipf,
|
marco@0
|
110 mimetype = "application/zip",
|
marco@0
|
111 packaging = packaging)
|
marco@0
|
112 print type, " submission successful."
|
marco@0
|
113 if type == "SimpleZip":
|
marco@0
|
114 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata
|
marco@0
|
115 if (args.title != None) or (args.author != None) or (args.date != None):
|
marco@0
|
116 entry = Entry()
|
marco@0
|
117 print "------------------------"
|
marco@0
|
118 print "Updating with additional metadata"
|
marco@0
|
119 if args.title != None:
|
marco@0
|
120 entry.add_fields(dcterms_title = args.title[0])
|
marco@0
|
121 if args.author != None:
|
marco@0
|
122 for creator in args.author:
|
marco@0
|
123 entry.add_fields(dcterms_creator=creator)
|
marco@0
|
124 if args.date != None:
|
marco@0
|
125 entry.add_fields(dcterms_created = args.date[0])
|
marco@0
|
126 try:
|
marco@0
|
127 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission
|
marco@0
|
128 print "Additional metadata updated successfully."
|
marco@0
|
129 except:
|
marco@0
|
130 print "Server error"
|
marco@0
|
131 print "------------------------"
|
marco@0
|
132 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"."
|
marco@0
|
133 except:
|
marco@0
|
134 print "Error! Couldn't submit the file!"
|
marco@0
|
135 if type == "METS": # Just guessing: not sure this is the problem...
|
marco@0
|
136 print "To submit a METS package, the collection MUST have a workflow!"
|
marco@0
|
137 payload.close()
|
marco@0
|
138
|
marco@0
|
139 else: # Failed to connect to SWORDv2 Server
|
marco@0
|
140 print "Couldn't connect to the server."
|
marco@0
|
141 if attempts == 0:
|
marco@0
|
142 print "Invalid credentials entered 3 times."
|
marco@0
|
143 if temp:
|
marco@0
|
144 os.remove(zipf)
|
marco@0
|
145
|
marco@0
|
146 except KeyboardInterrupt:
|
marco@0
|
147 print "------------------------"
|
marco@0
|
148 print "\nSubmission aborted by user."
|
marco@0
|
149
|