-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathsetup.py
109 lines (98 loc) · 3.34 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Lint as: python3
import os
from setuptools import find_packages, setup
REQUIRED_PKGS = [
# We use numpy>=1.17 to have np.random.Generator (Dataset shuffling)
"numpy>=1.17",
# Backend and serialization.
# Minimum 3.0.0 to support mix of struct and list types in parquet,
# and batch iterators of parquet data
"pyarrow>=3.0.0,!=4.0.0",
# For smart caching dataset processing
"dill",
# For performance gains with apache arrow
"pandas",
# for downloading datalabs over HTTPS
"requests>=2.19.0",
# progress bars in download and scripts
"tqdm>=4.62.1",
# dataclasses for Python versions that don't have it
"dataclasses;python_version<'3.7'",
# for fast hashing
"xxhash",
# for better multiprocessing
"multiprocess",
"pathos",
# to get metadata of optional dependencies such as torch or tensorflow
# for Python versions that don't have it
"importlib_metadata;python_version<'3.8'",
# to save datalabs locally or on any filesystem
# minimum 2021.05.0 to have the AbstractArchiveFileSystem
"fsspec[http]>=2021.05.0",
# for data streaming via http
"aiohttp",
"huggingface_hub>=0.1.0,<1.0.0",
# Utilities from PyPA to e.g., compare versions
"packaging",
# New dependencies needed by datalabs
"pymongo[srv]",
"spacy",
"checklist",
"lexicalrichness",
"sacrebleu",
"compare_mt",
"scikit-learn", # restricted by hatesonar pkg ==0.23.2
"seqeval",
"jieba",
"apache-beam",
]
AUDIO_REQUIRE = []
BENCHMARKS_REQUIRE = []
TESTS_REQUIRE = []
QUALITY_REQUIRE = ["pre-commit"]
EXTRAS_REQUIRE = {
"dev": TESTS_REQUIRE + QUALITY_REQUIRE,
"tests": TESTS_REQUIRE,
"quality": QUALITY_REQUIRE,
}
setup(
name="datalabs",
version="0.4.15",
description="Datalabs",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
author="expressai",
author_email="[email protected]",
url="https://github.com/expressai/datalab",
download_url="https://github.com/expressai/datalab/tags",
license="Apache 2.0",
packages=find_packages(),
package_data={
"datalabs": ["py.typed", "scripts/templates/*"],
"datalabs.utils.resources": ["*.json", "*.yaml"],
"datalabs.operations.featurize.pre_models": ["*.pkl", "*.json"],
"datalabs.operations.featurize.resources.gender_data": ["*.json"],
"datalabs.operations.edit.resources": ["*.json", "*.txt", "*.names", "*.tsv"],
},
entry_points={
"console_scripts": ["datalabs-cli=datalabs.commands.datasets_cli:main"]
},
install_requires=REQUIRED_PKGS,
extras_require=EXTRAS_REQUIRE,
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
keywords="dataset",
zip_safe=False,
include_package_data=True,
)
os.system("python -m spacy download en_core_web_sm")