diff --git a/internetarchive/item.py b/internetarchive/item.py index 5d428c8f..46c26888 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -49,7 +49,7 @@ from requests.exceptions import HTTPError from internetarchive.utils import IdentifierListAsItems, get_md5, chunk_generator, \ - IterableToFileAdapter, iter_directory, recursive_file_count, norm_filepath + IterableToFileAdapter, iter_directory, norm_filepath from internetarchive.files import File from internetarchive.iarequest import MetadataRequest, S3Request from internetarchive.auth import S3Auth @@ -1156,45 +1156,32 @@ def upload(self, files, """ queue_derive = True if queue_derive is None else queue_derive remote_dir_name = None - total_files = None + if isinstance(files, dict): if files.get('name'): files = [files] - total_files = 1 else: files = list(files.items()) if not isinstance(files, (list, tuple)): files = [files] - if all(isinstance(f, dict) and f.get('name') for f in files): - total_files = len(files) responses = [] file_index = 0 - if queue_derive and total_files is None: - if checksum: - total_files = recursive_file_count(files, item=self, checksum=True) - else: - total_files = recursive_file_count(files, item=self, checksum=False) file_metadata = None for f in files: + if isinstance(f, dict): if f.get('name'): file_metadata = f.copy() del file_metadata['name'] f = f['name'] + if (isinstance(f, string_types) and is_dir(f)) \ or (isinstance(f, tuple) and is_dir(f[-1])): if isinstance(f, tuple): remote_dir_name = f[0].strip('/') f = f[-1] for filepath, key in iter_directory(f): - file_index += 1 - # Set derive header if queue_derive is True, - # and this is the last request being made. - if queue_derive is True and file_index >= total_files: - _queue_derive = True - else: - _queue_derive = False if not f.endswith('/'): if remote_dir_name: key = '{0}{1}/{2}'.format(remote_dir_name, f, key) @@ -1210,7 +1197,7 @@ def upload(self, files, headers=headers, access_key=access_key, secret_key=secret_key, - queue_derive=_queue_derive, + queue_derive=False, verbose=verbose, verify=verify, checksum=checksum, @@ -1221,15 +1208,6 @@ def upload(self, files, request_kwargs=request_kwargs) responses.append(resp) else: - file_index += 1 - # Set derive header if queue_derive is True, - # and this is the last request being made. - # if queue_derive is True and file_index >= len(files): - if queue_derive is True and file_index >= total_files: - _queue_derive = True - else: - _queue_derive = False - if not isinstance(f, (list, tuple)): key, body = (None, f) else: @@ -1243,7 +1221,7 @@ def upload(self, files, headers=headers, access_key=access_key, secret_key=secret_key, - queue_derive=_queue_derive, + queue_derive=False, verbose=verbose, verify=verify, checksum=checksum, @@ -1253,6 +1231,12 @@ def upload(self, files, debug=debug, request_kwargs=request_kwargs) responses.append(resp) + + if queue_derive: + # Came this far without any exceptions raised, so all uploads + # probably completed successfully. Derive now. + self.derive() + return responses diff --git a/internetarchive/utils.py b/internetarchive/utils.py index 606b086e..5dadc1bd 100644 --- a/internetarchive/utils.py +++ b/internetarchive/utils.py @@ -207,54 +207,6 @@ def iter_directory(directory): yield (filepath, key) -def recursive_file_count(files, item=None, checksum=False): - """Given a filepath or list of filepaths, return the total number of files.""" - if not isinstance(files, (list, set)): - files = [files] - total_files = 0 - if checksum is True: - md5s = [f.get('md5') for f in item.files] - else: - md5s = list() - if isinstance(files, dict): - # make sure to use local filenames. - _files = files.values() - else: - if isinstance(files[0], tuple): - _files = dict(files).values() - else: - _files = files - for f in _files: - try: - is_dir = os.path.isdir(f) - except TypeError: - try: - f = f[0] - is_dir = os.path.isdir(f) - except (AttributeError, TypeError): - is_dir = False - if is_dir: - for x, _ in iter_directory(f): - if checksum is True: - with open(x, 'rb') as fh: - lmd5 = get_md5(fh) - if lmd5 in md5s: - continue - total_files += 1 - else: - if checksum is True: - try: - with open(f, 'rb') as fh: - lmd5 = get_md5(fh) - except TypeError: - # Support file-like objects. - lmd5 = get_md5(f) - if lmd5 in md5s: - continue - total_files += 1 - return total_files - - def is_dir(obj): """Special is_dir function to handle file-like object cases that cannot be stat'd"""