Skip to content

Commit

Permalink
Decompress gzip Content-Encoding, fix #41
Browse files Browse the repository at this point in the history
  • Loading branch information
damnever committed Apr 24, 2018
1 parent 5c67718 commit 0fa0a2d
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 26 deletions.
2 changes: 1 addition & 1 deletion pigar/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

from __future__ import print_function, division, absolute_import

version = __version__ = '0.7.1'
version = __version__ = '0.7.2'
version_info = [int(num) for num in version.split('.')]
28 changes: 14 additions & 14 deletions pigar/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,30 @@

from __future__ import print_function, division, absolute_import

import gzip
import json
import collections
try: # py2
from urllib2 import urlopen, Request
from HTMLParser import HTMLParser
from urlparse import urljoin
except ImportError: # py3
from urllib.request import urlopen, Request
from html.parser import HTMLParser
from urllib.parse import urljoin


from .db import database
from .unpack import top_level, unpack_html
from .unpack import top_level, try_unpack_resp
from .log import logger
from .utils import Color, compare_version, cmp_to_key
from .extractor import Extractor


PYPI_URL = 'https://pypi.python.org/'
PKG_URL = 'https://pypi.python.org/pypi/{0}'
PKGS_URL = 'https://pypi.python.org/simple/'
PKG_INFO_URL = 'https://pypi.python.org/pypi/{0}/json'
PYPI_URL = 'https://pypi.org'
PKG_URL = urljoin(PYPI_URL, '/pypi/{0}')
PKGS_URL = urljoin(PYPI_URL, '/simple/')
PKG_INFO_URL = urljoin(PYPI_URL, '/pypi/{0}/json')
ACCEPTABLE_EXT = ('.whl', '.egg', '.tar.gz', '.tar.bz2', '.zip')


Expand Down Expand Up @@ -68,7 +71,7 @@ def update_db():
return

logger.info('Extracting all packages ...')
pkg_names = _extract_html(unpack_html(data))
pkg_names = _extract_html(data)
with database() as db:
ignore_pkgs = db.query_package(None)
pkg_names = list(set(pkg_names) - set(ignore_pkgs))
Expand Down Expand Up @@ -126,14 +129,14 @@ def _pkg_json_info(pkg_name):
data = download(PKG_INFO_URL.format(pkg_name))
if not data: # 404
return None
data = json.loads(data.decode('utf-8'))
data = json.loads(data)
return data


# Fake headers, just in case.
_HEADERS = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Encoding': 'gzip',
'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
'User-Agent': ('Mozilla/5.0 (X11; Linux x86_64; rv:13.0) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
Expand All @@ -143,14 +146,11 @@ def _pkg_json_info(pkg_name):

def download(url, headers=_HEADERS):
"""Download data from url."""
f = None
resp = urlopen(Request(url, headers=headers))
try:
f = urlopen(Request(url, headers=headers))
data = f.read()
return try_unpack_resp(resp)
finally:
if f:
f.close()
return data
resp.close()


def _extract_html(html):
Expand Down
20 changes: 9 additions & 11 deletions pigar/unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import string
import io

from .utils import binary_type


class Archive(object):
"""Archive provides a consistent interface for unpacking
Expand Down Expand Up @@ -119,15 +121,11 @@ def top_level(url, data):
return [name.replace('/', '.') for name in txt.splitlines()] if txt else []


def unpack_html(data):
def try_unpack_resp(resp):
"""Unpack web page, Content-Encoding: gzip."""
try:
sb = io.BytesIO(data)
gz = gzip.GzipFile(fileobj=sb)
data = gz.read()
except Exception:
pass
finally:
gz.close()
sb.close()
return data.decode('utf-8')
data = resp.read()
if 'gzip' == resp.info().get('Content-Encoding'):
data = gzip.decompress(data)
if isinstance(data, binary_type):
data = data.decode('utf-8')
return data
5 changes: 5 additions & 0 deletions pigar/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@

PY32 = sys.version_info[:2] == (3, 2)

if sys.version_info[0] == 3:
binary_type = bytes
else:
binary_type = str


class Dict(dict):
"""Convert dict key object to attribute."""
Expand Down

0 comments on commit 0fa0a2d

Please sign in to comment.