Mercurial > hg > sworduploader
comparison sworduploader.py @ 4:96d62e78ac9f
Added support for single file upload usin edit media resources
| author | Marco Fabiani <marco.fabiani@eecs.qmul.ac.uk> |
|---|---|
| date | Thu, 29 Mar 2012 17:39:46 +0100 |
| parents | c1918aa337c4 |
| children | 13fc2773b3fe |
comparison
equal
deleted
inserted
replaced
| 3:c1918aa337c4 | 4:96d62e78ac9f |
|---|---|
| 1 #!usr/bin/env/ python | 1 #!usr/bin/env/ python |
| 2 | 2 |
| 3 """ SWORD2 DSpace bulk uploader - v0.2 | 3 """ SWORD2 DSpace bulk uploader - v0.3 |
| 4 | 4 |
| 5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x. | 5 A python script to submit large numbers of files to a SWORD2-compatible repository, specifically DSpace 1.8x. |
| 6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview | 6 Built on the SWORD2 python client library: https://bitbucket.org/beno/python-sword2/overview |
| 7 | 7 |
| 8 Dependencies: | 8 Dependencies: |
| 9 | 9 |
| 10 - python 2.X | 10 - python 2.X |
| 11 | 11 |
| 12 - sword2 library: https://bitbucket.org/beno/python-sword2/src | 12 - sword2 library, with modifications: |
| 13 (original) https://bitbucket.org/beno/python-sword2/src | |
| 14 (modified) https://code.soundsoftware.ac.uk/hg/sworduploader | |
| 13 | 15 |
| 14 ----------------------------------- | 16 ----------------------------------- |
| 15 Copyright 2012 Marco Fabiani | 17 Copyright 2012 Marco Fabiani |
| 16 Copyright 2012 Queen Mary, University of London | 18 Copyright 2012 Queen Mary, University of London |
| 17 ----------------------------------- | 19 ----------------------------------- |
| 18 """ | 20 """ |
| 19 | 21 |
| 20 import argparse, getpass, zipfile,os,sys | 22 import argparse, getpass, zipfile, os, sys |
| 21 from sword2 import * | 23 from sword2 import * |
| 22 | 24 |
| 23 # Parse arguments | 25 # Parse arguments |
| 24 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.") | 26 parser = argparse.ArgumentParser(description="Bulk upload to DSpace using SWORDv2.",epilog="If the submission is created successfully, it will remain open to be completed with the necessary metadata and licenses, using the DSpace web interface. The submission can be found in the \"My Account -> Submissions\" section of the user's area.") |
| 25 parser.add_argument("data", type=str, nargs=1, | 27 parser.add_argument("data", type=str, nargs=1, |
| 26 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!") | 28 help="Accepts: METSDSpaceSIP packages, zip files, directories, single files. NOTE: METSDSpaceSIP packages are only accepted by Collections with a workflow!") |
| 27 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.") | 29 parser.add_argument("--username", dest="user_name", type=str,nargs=1, help="DSpace username.") |
| 30 parser.add_argument("--zip", action="store_true",dest="zip",default=False, help="If \"data\" is a directory, send it as a single zip archive to preserve its structure.") | |
| 28 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).") | 31 parser.add_argument("--title", dest="title", type=str,nargs=1, help="Title (ignored for METS packages).") |
| 29 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"") | 32 parser.add_argument("--author", dest="author", type=str,nargs="+", help="Author(s) (ignored for METS packages). Accepts multiple entries in the format \"Surname, Name\"") |
| 30 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).") | 33 parser.add_argument("--date", dest="date", type=str,nargs=1, help="Date of creation (string) (ignored for METS packages).") |
| 31 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument") | 34 parser.add_argument("--servicedoc", dest="dspaceurl", type=str,nargs=1, help="Url of the SWORDv2 service document (default: http://c4dm.eecs.qmul.ac.uk/smdmrd-test/swordv2/servicedocument") |
| 32 | 35 |
| 82 while (sel<=0 or sel>numColl): | 85 while (sel<=0 or sel>numColl): |
| 83 sel = input("Select a Collection to submit your files into: ") | 86 sel = input("Select a Collection to submit your files into: ") |
| 84 collection = c.workspaces[0][1][sel-1] | 87 collection = c.workspaces[0][1][sel-1] |
| 85 print "Selected Collection: ",collection.title | 88 print "Selected Collection: ",collection.title |
| 86 | 89 |
| 87 # Create a submission: build the zip files | 90 # Create a submission |
| 88 temp = True # delete the zip file at the end of the upload | 91 fileslist = [] |
| 89 if zipfile.is_zipfile(data): | 92 temp = False # Delete temp files |
| 90 zipf = data | 93 # If folder |
| 91 temp = False | 94 if os.path.isdir(data): |
| 95 if args.zip: # If zip option, zip all the files and maintain the structure, but start from the base only... | |
| 96 dataname = os.path.basename(os.path.normpath(data)) | |
| 97 zipf = dataname+".zip" | |
| 98 myzip = zipfile.ZipFile(zipf, "w") | |
| 99 # get the directory structure | |
| 100 print "Creating a zip archive for submission..." | |
| 101 for root, dirs, files in os.walk(data): | |
| 102 for name in files: | |
| 103 myzip.write(os.path.join(root,name), | |
| 104 os.path.relpath(os.path.join(root,name),data)) | |
| 105 fileslist.append(zipf) | |
| 106 packaging = "http://purl.org/net/sword/package/SimpleZip" | |
| 107 type = "SimpleZip" | |
| 108 temp = True | |
| 109 else: #create a list of files to upload | |
| 110 for root, dirs, files in os.walk(data): | |
| 111 for name in files: | |
| 112 fileslist.append(os.path.join(root,name)) | |
| 113 type = "multiple files" | |
| 114 elif zipfile.is_zipfile(data): #This is a zip file | |
| 115 fileslist.append(data) | |
| 116 myzip = zipfile.ZipFile(data) | |
| 117 if "mets.xml" in myzip.namelist(): #This is a METS package | |
| 118 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP" | |
| 119 type = "METS" | |
| 120 else: #THis is a simple zip file | |
| 121 packaging = "http://purl.org/net/sword/package/SimpleZip" | |
| 122 type = "SimpleZip" | |
| 123 myzip.close() | |
| 92 elif os.path.isfile(data): # This is a single file | 124 elif os.path.isfile(data): # This is a single file |
| 93 dataname = os.path.basename(data) | 125 fileslist.append(data) |
| 94 zipf = os.path.splitext(dataname)[0]+".zip" | 126 type = "single file" |
| 95 myzip = zipfile.ZipFile(zipf, "w") | |
| 96 myzip.write(data,os.path.basename(data)) | |
| 97 myzip.close() | |
| 98 elif os.path.isdir(data): # This is a directory, zip all the files and maintain the structure, but start from the base only... | |
| 99 dataname = os.path.basename(os.path.normpath(data)) | |
| 100 zipf = dataname+".zip" | |
| 101 myzip = zipfile.ZipFile(zipf, "w") | |
| 102 # get the directory structure | |
| 103 print "Creating a zip archive for submission..." | |
| 104 for root, dirs, files in os.walk(data): | |
| 105 for name in files: | |
| 106 myzip.write(os.path.join(root,name), | |
| 107 os.path.relpath(os.path.join(root,name),data)) | |
| 108 myzip.close() | |
| 109 else: | 127 else: |
| 110 print "Couldn't find the data." | 128 print "Couldn't find the data." |
| 111 sys.exit() | 129 sys.exit() |
| 112 | 130 |
| 113 #Check if this is a METSDSpaceSIP: see if there is a mets.xml file in the zip | |
| 114 myzip = zipfile.ZipFile(zipf) | |
| 115 if "mets.xml" in myzip.namelist(): | |
| 116 packaging = "http://purl.org/net/sword/package/METSDSpaceSIP" | |
| 117 type = "METS" | |
| 118 else: | |
| 119 packaging = "http://purl.org/net/sword/package/SimpleZip" | |
| 120 type = "SimpleZip" | |
| 121 | |
| 122 print "------------------------" | 131 print "------------------------" |
| 123 print "This is a ",type," submission" | 132 print "This is a ",type," submission" |
| 124 print "Uploading files..." | 133 |
| 125 myzip.close() | 134 # Create a metadata entry |
| 126 | 135 if (args.title != None) or (args.author != None) or (args.date != None): |
| 127 payload = open(zipf, "rb") | 136 entry = Entry() |
| 128 try: | 137 if args.title != None: |
| 129 receipt_dep = c.create(col_iri = collection.href, | 138 entry.add_fields(dcterms_title = args.title[0]) |
| 130 payload = payload, | 139 if args.author != None: |
| 131 filename = zipf, | 140 for creator in args.author: |
| 132 mimetype = "application/zip", | 141 entry.add_fields(dcterms_creator=creator) |
| 133 packaging = packaging) | 142 if args.date != None: |
| 134 print type, " submission successful." | 143 entry.add_fields(dcterms_created = args.date[0]) |
| 135 if type == "SimpleZip": | 144 else: |
| 136 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata | 145 entry = None |
| 137 if (args.title != None) or (args.author != None) or (args.date != None): | 146 # Select what to do |
| 138 entry = Entry() | 147 if (type is "single file") or (type is "multiple files"): |
| 139 print "------------------------" | 148 try: |
| 140 print "Updating with additional metadata" | 149 # Create the metadata entry with ATOM |
| 141 if args.title != None: | 150 print "------------------------" |
| 142 entry.add_fields(dcterms_title = args.title[0]) | 151 print "Creating the item..." |
| 143 if args.author != None: | 152 if entry is None: |
| 144 for creator in args.author: | 153 entry = Entry(dcterms_title=(os.path.basename(data))) |
| 145 entry.add_fields(dcterms_creator=creator) | 154 creation_receipt = c.create(col_iri = collection.href, metadata_entry = entry) |
| 146 if args.date != None: | 155 |
| 147 entry.add_fields(dcterms_created = args.date[0]) | 156 # Add the files |
| 148 try: | 157 for f in fileslist: |
| 149 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission | 158 print "Uploading file ",os.path.basename(f) |
| 150 print "Additional metadata updated successfully." | 159 payload = open(f,"rb") |
| 151 except: | 160 deposit_receipt = c.add_file_to_resource(edit_media_iri = creation_receipt.edit_media, |
| 152 print "Server error" | 161 payload = payload, |
| 153 print "------------------------" | 162 filename = os.path.basename(f), |
| 154 print "You will find this submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." | 163 mimetype = 'application/zip', |
| 155 except: | 164 packaging = 'http://purl.org/net/sword/package/Binary') |
| 156 print "Error! Couldn't submit the file!" | 165 payload.close() |
| 157 if type == "METS": # Just guessing: not sure this is the problem... | 166 except HTTPResponseError: |
| 158 print "To submit a METS package, the collection MUST have a workflow!" | 167 print "Bad request" |
| 159 payload.close() | 168 else: |
| 160 if temp: | 169 # PUT the data |
| 161 os.remove(zipf) | 170 payload = open(fileslist[0], "rb") |
| 171 try: | |
| 172 deposit_receipt = c.create(col_iri = collection.href, | |
| 173 payload = payload, | |
| 174 filename = fileslist[0], | |
| 175 mimetype = "application/zip", | |
| 176 packaging = packaging) | |
| 177 print type, " submission successful." | |
| 178 except: | |
| 179 print "Error! Couldn't submit the file!" | |
| 180 if type == "METS": # Just guessing: not sure this is the problem... | |
| 181 print "To submit a METS package, the collection MUST have a workflow!" | |
| 182 payload.close() | |
| 183 | |
| 184 # If some of the additional arguments for author, title, date etc. have been specified, update the metadata | |
| 185 if type == "SimpleZip" and entry != None: | |
| 186 try: | |
| 187 receipt_update = c.update(dr = receipt_dep , metadata_entry = entry, in_progress = True) # in_progress is True: we don't want to close the submission | |
| 188 print "Additional metadata updated successfully." | |
| 189 except: | |
| 190 print "Server error" | |
| 191 if temp: | |
| 192 os.remove(fileslist[0]) | |
| 193 | |
| 194 print "------------------------" | |
| 195 print "You will find the submission in the \"Submissions\" list in your DSpace account. To complete/edit it with metadata and licenses, click on the title and then on \"Resume\"." | |
| 196 | |
| 162 | 197 |
| 163 else: # Failed to connect to SWORDv2 Server | 198 else: # Failed to connect to SWORDv2 Server |
| 164 print "Couldn't connect to the server." | 199 print "Couldn't connect to the server." |
| 165 if attempts == 0: | 200 if attempts == 0: |
| 166 print "Invalid credentials entered 3 times." | 201 print "Invalid credentials entered 3 times." |
