diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fc3cdae4..4dad4ba1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,19 +17,25 @@ jobs:
timeout-minutes: 120
strategy:
matrix:
- python-version: [3.6,3.7]
- torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.0,1.8.1]
+ python-version: [3.6,3.7,3.8]
+ torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.1,1.8.1,1.9.0,1.10.2,1.11.0]
-# exclude:
-# - python-version: 3.5
-# tf-version: 1.1.0
+ exclude:
+ - python-version: 3.6
+ torch-version: 1.11.0
+ - python-version: 3.8
+ torch-version: 1.1.0
+ - python-version: 3.8
+ torch-version: 1.2.0
+ - python-version: 3.8
+ torch-version: 1.3.0
steps:
- - uses: actions/checkout@v1
+ - uses: actions/checkout@v3
- name: Setup python environment
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
@@ -47,7 +53,7 @@ jobs:
pip install -q sklearn
pytest --cov=deepctr_torch --cov-report=xml
- name: Upload coverage to Codecov
- uses: codecov/codecov-action@v1.0.2
+ uses: codecov/codecov-action@v3.1.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.xml
diff --git a/README.md b/README.md
index 7c646933..6d02554e 100644
--- a/README.md
+++ b/README.md
@@ -47,34 +47,19 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
## DisscussionGroup & Related Projects
-
-
-
-
- 公众号:浅梦学习笔记
-
-
-
- |
-
- 微信:deepctrbot
-
-
-
- |
-
-
- |
-
-
-
+- [Github Discussions](https://github.com/shenweichen/DeepCTR/discussions)
+- Wechat Discussions
+|公众号:浅梦学习笔记|微信:deepctrbot|学习小组 [加入](https://t.zsxq.com/026UJEuzv) [主题集合](https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MjM5MzY4NzE3MA==&action=getalbum&album_id=1361647041096843265&scene=126#wechat_redirect)|
+|:--:|:--:|:--:|
+| [![公众号](./docs/pics/code.png)](https://github.com/shenweichen/AlgoNotes)| [![微信](./docs/pics/deepctrbot.png)](https://github.com/shenweichen/AlgoNotes)|[![学习小组](./docs/pics/planet_github.png)](https://t.zsxq.com/026UJEuzv)|
+- Related Projects
+
+ - [AlgoNotes](https://github.com/shenweichen/AlgoNotes)
+ - [DeepCTR](https://github.com/shenweichen/DeepCTR)
+ - [DeepMatch](https://github.com/shenweichen/DeepMatch)
+ - [GraphEmbedding](https://github.com/shenweichen/GraphEmbedding)
## Main Contributors([welcome to join us!](./CONTRIBUTING.md))
@@ -84,59 +69,58 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
Shen Weichen
- Core Dev Zhejiang Unversity
+ Alibaba Group
|
Zan Shuxun
- Core Dev Beijing University of Posts and Telecommunications
+ Alibaba Group
|
Wang Ze
- Core Dev Beihang University
+ Meituan
|
Zhang Wutong
- Core Dev Beijing University of Posts and Telecommunications
+ Tencent
|
Zhang Yuefeng
- Core Dev
- Peking University
+ Peking University
|
Huo Junyi
- Core Dev
+
University of Southampton
|
Zeng Kai
- Dev
+
SenseTime
|
Chen K
- Dev
+
NetEase
|
Cheng Weiyu
- Dev
+
Shanghai Jiao Tong University
|
Tang
- Test
+
Tongji University
|
diff --git a/deepctr_torch/__init__.py b/deepctr_torch/__init__.py
index 88508515..4be7a5bc 100644
--- a/deepctr_torch/__init__.py
+++ b/deepctr_torch/__init__.py
@@ -2,5 +2,5 @@
from . import models
from .utils import check_version
-__version__ = '0.2.7'
+__version__ = '0.2.8'
check_version(__version__)
\ No newline at end of file
diff --git a/deepctr_torch/models/basemodel.py b/deepctr_torch/models/basemodel.py
index 4235ad38..17e57b90 100644
--- a/deepctr_torch/models/basemodel.py
+++ b/deepctr_torch/models/basemodel.py
@@ -3,6 +3,7 @@
Author:
Weichen Shen,weichenswc@163.com
+ zanshuxun, zanshuxun@aliyun.com
"""
from __future__ import print_function
@@ -75,7 +76,7 @@ def forward(self, X, sparse_feat_refine_weight=None):
sparse_embedding_list += varlen_embedding_list
- linear_logit = torch.zeros([X.shape[0], 1]).to(sparse_embedding_list[0].device)
+ linear_logit = torch.zeros([X.shape[0], 1]).to(self.device)
if len(sparse_embedding_list) > 0:
sparse_embedding_cat = torch.cat(sparse_embedding_list, dim=-1)
if sparse_feat_refine_weight is not None:
@@ -476,6 +477,10 @@ def _log_loss(self, y_true, y_pred, eps=1e-7, normalize=True, sample_weight=None
sample_weight,
labels)
+ @staticmethod
+ def _accuracy_score(y_true, y_pred):
+ return accuracy_score(y_true, np.where(y_pred > 0.5, 1, 0))
+
def _get_metrics(self, metrics, set_eps=False):
metrics_ = {}
if metrics:
@@ -490,8 +495,7 @@ def _get_metrics(self, metrics, set_eps=False):
if metric == "mse":
metrics_[metric] = mean_squared_error
if metric == "accuracy" or metric == "acc":
- metrics_[metric] = lambda y_true, y_pred: accuracy_score(
- y_true, np.where(y_pred > 0.5, 1, 0))
+ metrics_[metric] = self._accuracy_score
self.metrics_names.append(metric)
return metrics_
diff --git a/docs/pics/code2.jpg b/docs/pics/code2.jpg
new file mode 100644
index 00000000..e191f297
Binary files /dev/null and b/docs/pics/code2.jpg differ
diff --git a/docs/pics/planet_github.png b/docs/pics/planet_github.png
new file mode 100644
index 00000000..67efe968
Binary files /dev/null and b/docs/pics/planet_github.png differ
diff --git a/docs/requirements.readthedocs.txt b/docs/requirements.readthedocs.txt
index 80bc5e6d..793412bd 100644
--- a/docs/requirements.readthedocs.txt
+++ b/docs/requirements.readthedocs.txt
@@ -1,2 +1,3 @@
Cython>=0.28.5
-tensorflow==1.15.4
+tensorflow==2.7.2
+scikit-learn==1.0
diff --git a/docs/source/FAQ.md b/docs/source/FAQ.md
index 102e35bc..1006bf42 100644
--- a/docs/source/FAQ.md
+++ b/docs/source/FAQ.md
@@ -6,7 +6,7 @@ To save/load weights:
```python
import torch
-model = DeepFM()
+model = DeepFM(...)
torch.save(model.state_dict(), 'DeepFM_weights.h5')
model.load_state_dict(torch.load('DeepFM_weights.h5'))
```
@@ -15,7 +15,7 @@ To save/load models:
```python
import torch
-model = DeepFM()
+model = DeepFM(...)
torch.save(model, 'DeepFM.h5')
model = torch.load('DeepFM.h5')
```
diff --git a/docs/source/History.md b/docs/source/History.md
index ec68a102..4984dfc4 100644
--- a/docs/source/History.md
+++ b/docs/source/History.md
@@ -1,5 +1,6 @@
# History
-- 06/14/2021 : [v0.2.7](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.6) released.Add [AFN](./Features.html#afn-adaptive-factorization-network-learning-adaptive-order-feature-interactions) and fix some bugs.
+- 06/19/2022 : [v0.2.8](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.8) released.Fix some bugs.
+- 06/14/2021 : [v0.2.7](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.7) released.Add [AFN](./Features.html#afn-adaptive-factorization-network-learning-adaptive-order-feature-interactions) and fix some bugs.
- 04/04/2021 : [v0.2.6](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.6) released.Add [IFM](./Features.html#ifm-input-aware-factorization-machine) and [DIFM](./Features.html#difm-dual-input-aware-factorization-machine);Support multi-gpus running([example](./FAQ.html#how-to-run-the-demo-with-multiple-gpus)).
- 02/12/2021 : [v0.2.5](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.5) released.Fix bug in DCN-M.
- 12/05/2020 : [v0.2.4](https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.4) released.Imporve compatibility & fix issues.Add History callback.([example](https://deepctr-torch.readthedocs.io/en/latest/FAQ.html#set-learning-rate-and-use-earlystopping)).
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e99b48ea..615f48b0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,7 +26,7 @@
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
-release = '0.2.7'
+release = '0.2.8'
# -- General configuration ---------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1701d403..497d232b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -34,18 +34,21 @@ You can read the latest code at https://github.com/shenweichen/DeepCTR-Torch and
News
-----
+06/19/2022 : Fix some bugs. `Changelog `_
+
06/14/2021 : Add `AFN <./Features.html#afn-adaptive-factorization-network-learning-adaptive-order-feature-interactions>`_ and fix some bugs. `Changelog `_
04/04/2021 : Add `IFM <./Features.html#ifm-input-aware-factorization-machine>`_ and `DIFM <./Features.html#difm-dual-input-aware-factorization-machine>`_ . Support multi-gpus running(`example <./FAQ.html#how-to-run-the-demo-with-multiple-gpus>`_). `Changelog `_
-02/12/2021 : Fix bug in DCN-M. `Changelog `_
DisscussionGroup
-----------------------
-公众号:**浅梦学习笔记** wechat ID: **deepctrbot**
+ 公众号:**浅梦学习笔记** wechat ID: **deepctrbot**
+
+ `Discussions `_ `学习小组主题集合 `_
-.. image:: ../pics/code.png
+.. image:: ../pics/code2.jpg
.. toctree::
:maxdepth: 2
diff --git a/setup.py b/setup.py
index 4e44fe13..705a9905 100644
--- a/setup.py
+++ b/setup.py
@@ -4,12 +4,12 @@
long_description = fh.read()
REQUIRED_PACKAGES = [
- 'torch>=1.1.0', 'tqdm', 'sklearn', 'tensorflow'
+ 'torch>=1.1.0', 'tqdm', 'scikit-learn', 'tensorflow'
]
setuptools.setup(
name="deepctr-torch",
- version="0.2.7",
+ version="0.2.8",
author="Weichen Shen",
author_email="weichenswc@163.com",
description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with PyTorch",
@@ -37,6 +37,7 @@
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
diff --git a/tests/models/DeepFM_test.py b/tests/models/DeepFM_test.py
index b11b5cb4..a11dc3bd 100644
--- a/tests/models/DeepFM_test.py
+++ b/tests/models/DeepFM_test.py
@@ -6,21 +6,33 @@
@pytest.mark.parametrize(
- 'use_fm,hidden_size,sparse_feature_num',
- [(True, (32,), 3),
- (False, (32,), 3),
- (False, (32,), 2), (False, (32,), 1), (True, (), 1), (False, (), 2)
+ 'use_fm,hidden_size,sparse_feature_num,dense_feature_num',
+ [(True, (32,), 3, 3),
+ (False, (32,), 3, 3),
+ (False, (32,), 2, 2),
+ (False, (32,), 1, 1),
+ (True, (), 1, 1),
+ (False, (), 2, 2),
+ (True, (32,), 0, 3),
+ (True, (32,), 3, 0),
+ (False, (32,), 0, 3),
+ (False, (32,), 3, 0),
]
)
-def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
+def test_DeepFM(use_fm, hidden_size, sparse_feature_num, dense_feature_num):
model_name = "DeepFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(
- sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num)
+ sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num)
model = DeepFM(feature_columns, feature_columns, use_fm=use_fm,
dnn_hidden_units=hidden_size, dnn_dropout=0.5, device=get_device())
check_model(model, model_name, x, y)
+ # no linear part
+ model = DeepFM([], feature_columns, use_fm=use_fm,
+ dnn_hidden_units=hidden_size, dnn_dropout=0.5, device=get_device())
+ check_model(model, model_name + '_no_linear', x, y)
+
if __name__ == "__main__":
pass
diff --git a/tests/utils.py b/tests/utils.py
index 10abcecb..28f3010b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,6 +4,7 @@
import numpy as np
import torch as torch
+from deepctr_torch.callbacks import EarlyStopping, ModelCheckpoint
from deepctr_torch.inputs import SparseFeat, DenseFeat, VarLenSparseFeat
SAMPLE_SIZE = 64
@@ -17,46 +18,46 @@ def gen_sequence(dim, max_len, sample_size):
def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
sequence_feature=['sum', 'mean', 'max'], classification=True, include_length=False,
hash_flag=False, prefix=''):
-
-
feature_columns = []
model_input = {}
-
- if 'weight' in sequence_feature:
- feature_columns.append(VarLenSparseFeat(SparseFeat(prefix+"weighted_seq",vocabulary_size=2,embedding_dim=embedding_size),maxlen=3,length_name=prefix+"weighted_seq"+"_seq_length",weight_name=prefix+"weight"))
+ if 'weight' in sequence_feature:
+ feature_columns.append(
+ VarLenSparseFeat(SparseFeat(prefix + "weighted_seq", vocabulary_size=2, embedding_dim=embedding_size),
+ maxlen=3, length_name=prefix + "weighted_seq" + "_seq_length",
+ weight_name=prefix + "weight"))
s_input, s_len_input = gen_sequence(
2, 3, sample_size)
- model_input[prefix+"weighted_seq"] = s_input
- model_input[prefix+'weight'] = np.random.randn(sample_size,3,1)
- model_input[prefix+"weighted_seq"+"_seq_length"] = s_len_input
+ model_input[prefix + "weighted_seq"] = s_input
+ model_input[prefix + 'weight'] = np.random.randn(sample_size, 3, 1)
+ model_input[prefix + "weighted_seq" + "_seq_length"] = s_len_input
sequence_feature.pop(sequence_feature.index('weight'))
-
for i in range(sparse_feature_num):
dim = np.random.randint(1, 10)
- feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,dtype=torch.int32))
+ feature_columns.append(SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, dtype=torch.int32))
for i in range(dense_feature_num):
- feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=torch.float32))
+ feature_columns.append(DenseFeat(prefix + 'dense_feature_' + str(i), 1, dtype=torch.float32))
for i, mode in enumerate(sequence_feature):
dim = np.random.randint(1, 10)
maxlen = np.random.randint(1, 10)
feature_columns.append(
- VarLenSparseFeat(SparseFeat(prefix +'sequence_' + mode,vocabulary_size=dim, embedding_dim=embedding_size), maxlen=maxlen, combiner=mode))
+ VarLenSparseFeat(SparseFeat(prefix + 'sequence_' + mode, vocabulary_size=dim, embedding_dim=embedding_size),
+ maxlen=maxlen, combiner=mode))
for fc in feature_columns:
- if isinstance(fc,SparseFeat):
- model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
- elif isinstance(fc,DenseFeat):
+ if isinstance(fc, SparseFeat):
+ model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size)
+ elif isinstance(fc, DenseFeat):
model_input[fc.name] = np.random.random(sample_size)
else:
s_input, s_len_input = gen_sequence(
fc.vocabulary_size, fc.maxlen, sample_size)
model_input[fc.name] = s_input
if include_length:
- fc.length_name = prefix+"sequence_"+str(i)+'_seq_length'
- model_input[prefix+"sequence_"+str(i)+'_seq_length'] = s_len_input
+ fc.length_name = prefix + "sequence_" + str(i) + '_seq_length'
+ model_input[prefix + "sequence_" + str(i) + '_seq_length'] = s_len_input
if classification:
y = np.random.randint(0, 2, sample_size)
@@ -66,7 +67,7 @@ def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dens
return model_input, y, feature_columns
-def layer_test(layer_cls, kwargs = {}, input_shape=None,
+def layer_test(layer_cls, kwargs={}, input_shape=None,
input_dtype=torch.float32, input_data=None, expected_output=None,
expected_output_shape=None, expected_output_dtype=None, fixed_batch_size=False):
'''check layer is valid or not
@@ -90,7 +91,7 @@ def layer_test(layer_cls, kwargs = {}, input_shape=None,
for i, e in enumerate(input_data_shape):
if e is None:
input_data_shape[i] = np.random.randint(1, 4)
-
+
if all(isinstance(e, tuple) for e in input_data_shape):
input_data = []
for e in input_data_shape:
@@ -104,37 +105,37 @@ def layer_test(layer_cls, kwargs = {}, input_shape=None,
# use input_data to update other parameters
if input_shape is None:
input_shape = input_data.shape
-
+
if expected_output_dtype is None:
expected_output_dtype = input_dtype
-
+
# layer initialization
layer = layer_cls(**kwargs)
-
+
if fixed_batch_size:
inputs = torch.tensor(input_data.unsqueeze(0), dtype=input_dtype)
else:
inputs = torch.tensor(input_data, dtype=input_dtype)
-
+
# calculate layer's output
output = layer(inputs)
if not output.dtype == expected_output_dtype:
raise AssertionError("layer output dtype does not match with the expected one")
-
+
if not expected_output_shape:
- raise ValueError("expected output shape should not be none")
+ raise ValueError("expected output shape should not be none")
actual_output_shape = output.shape
for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape):
if expected_dim is not None:
if not expected_dim == actual_dim:
raise AssertionError(f"expected_dim:{expected_dim}, actual_dim:{actual_dim}")
-
+
if expected_output is not None:
# check whether output equals to expected output
assert_allclose(output, expected_output, rtol=1e-3)
-
+
return output
@@ -148,10 +149,14 @@ def check_model(model, model_name, x, y, check_model_io=True):
:param check_model_io:
:return:
'''
+ early_stopping = EarlyStopping(monitor='val_acc', min_delta=0, verbose=1, patience=0, mode='max')
+ model_checkpoint = ModelCheckpoint(filepath='model.ckpt', monitor='val_acc', verbose=1,
+ save_best_only=True,
+ save_weights_only=False, mode='max', period=1)
model.compile('adam', 'binary_crossentropy',
- metrics=['binary_crossentropy'])
- model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
+ metrics=['binary_crossentropy', 'acc'])
+ model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5, callbacks=[early_stopping, model_checkpoint])
print(model_name + 'test, train valid pass!')
torch.save(model.state_dict(), model_name + '_weights.h5')
@@ -165,9 +170,10 @@ def check_model(model, model_name, x, y, check_model_io=True):
print(model_name + 'test save load model pass!')
print(model_name + 'test pass!')
-def get_device(use_cuda = True):
+
+def get_device(use_cuda=True):
device = 'cpu'
if use_cuda and torch.cuda.is_available():
print('cuda ready...')
device = 'cuda:0'
- return device
\ No newline at end of file
+ return device