Skip to content

Commit

Permalink
Merge pull request #929 from efiop/master
Browse files Browse the repository at this point in the history
s3: don't use Bucket()
  • Loading branch information
efiop authored Jul 20, 2018
2 parents 2ee9525 + 2881cd2 commit 4bbbdb3
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions dvc/remote/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _copy(self, from_info, to_info, s3=None):

source = {'Bucket': from_info['bucket'],
'Key': from_info['key']}
self.s3.Bucket(to_info['bucket']).copy(source, to_info['key'])
self.s3.copy(source, to_info['bucket'], to_info['key'])

def save(self, path_info):
if path_info['scheme'] != 's3':
Expand Down Expand Up @@ -139,12 +139,7 @@ def md5s_to_path_infos(self, md5s):
'bucket': self.bucket,
'key': posixpath.join(self.prefix, md5[0:2], md5[2:])} for md5 in md5s]

def exists(self, path_infos):
# NOTE: We mostly use exists() method when filtering a bulk of cache
# files to decide if we need to download/upload them and in s3
# list_objects_v2() is much-much faster than trying to check keys
# one-by-one.
ret = []
def _all_keys(self):
s3 = self.s3

keys = []
Expand All @@ -165,6 +160,17 @@ def exists(self, path_infos):

kwargs['ContinuationToken'] = token

return keys

def exists(self, path_infos):
# NOTE: We mostly use exists() method when filtering a bulk of cache
# files to decide if we need to download/upload them and in s3
# list_objects_v2() is much-much faster than trying to check keys
# one-by-one.
ret = []

keys = self._all_keys()

for path_info in path_infos:
exists = False
if path_info['key'] in keys:
Expand Down Expand Up @@ -253,8 +259,8 @@ def _path_to_etag(self, path):
return posixpath.dirname(relpath) + posixpath.basename(relpath)

def _all(self):
objects = self.s3.Bucket(self.bucket).objects.filter(Prefix=self.prefix)
return [self._path_to_etag(obj.key) for obj in objects]
keys = self._all_keys()
return [self._path_to_etag(key) for key in keys]

def gc(self, checksum_infos):
used_etags = [info[self.PARAM_ETAG] for info in checksum_infos['s3']]
Expand Down

0 comments on commit 4bbbdb3

Please sign in to comment.