Merge pull request #49 from shenweichen/final-ccpm

add ccpm
shenweichen · Sep 23, 2019 · 328255d · 328255d
2 parents abb3a82 + 36a8f66
commit 328255d
Show file tree

Hide file tree

Showing 7 changed files with 192 additions and 6 deletions.
diff --git a/deepctr_torch/layers/__init__.py b/deepctr_torch/layers/__init__.py
@@ -1,3 +1,4 @@
 from .interaction import *
 from .core import *
 from .utils import concat_fun
+from .sequence import KMaxPooling
diff --git a/deepctr_torch/layers/core.py b/deepctr_torch/layers/core.py
@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-
+import math
 
 class DNN(nn.Module):
     """The Multi Layer Percetron
@@ -94,3 +94,26 @@ def forward(self, X):
         if self.task == "binary":
             output = torch.sigmoid(output)
         return output
+
+class Conv2dSame(nn.Conv2d):
+    """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions
+    """
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True):
+        super(Conv2dSame, self).__init__(
+            in_channels, out_channels, kernel_size, stride, 0, dilation,
+            groups, bias)
+        nn.init.xavier_uniform_(self.weight)
+
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.weight.size()[-2:]
+        oh = math.ceil(ih / self.stride[0])
+        ow = math.ceil(iw / self.stride[1])
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2])
+        out = F.conv2d(x, self.weight, self.bias, self.stride,
+                        self.padding, self.dilation, self.groups)
+        return out
diff --git a/deepctr_torch/layers/interaction.py b/deepctr_torch/layers/interaction.py
@@ -4,6 +4,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from ..layers.sequence import KMaxPooling
+from ..layers.core import Conv2dSame
 
 class FM(nn.Module):
     """Factorization Machine models pairwise (order-2) feature interactions
@@ -567,4 +569,45 @@ def forward(self, inputs):
 
             # p q # b * p * k
 
-        return kp
+        return kp
+
+class ConvLayer(nn.Module):
+    """Conv Layer used in CCPM.This implemention is
+    adapted from code that the author of the paper published on http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf.
+      Input shape
+            - A list of N 3D tensor with shape: ``(batch_size,1,filed_size,embedding_size)``.
+      Output shape
+            - A list of N 3D tensor with shape: ``(batch_size,last_filters,pooling_size,embedding_size)``.
+      Arguments
+            - **conv_kernel_width**: list. list of positive integer or empty list,the width of filter in each conv layer.
+            - **conv_filters**: list. list of positive integer or empty list,the number of filters in each conv layer.
+      Reference:
+            - Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.(http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
+    """
+    def __init__(self, filed_size, conv_kernel_width, conv_filters, device='cpu'):
+        super(ConvLayer, self).__init__()
+        self.device = device
+        module_list = []
+        n = filed_size
+        l = len(conv_filters)
+        for i in range(1, l + 1):
+            if i == 1:
+                in_channels = 1
+            else:
+                in_channels = conv_filters[i - 2]
+            out_channels = conv_filters[i - 1]
+            width = conv_kernel_width[i - 1]
+            k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3
+            module_list.append(Conv2dSame(in_channels=in_channels, out_channels=out_channels, kernel_size=(width, 1),
+                                        stride=1).to(self.device))
+            module_list.append(torch.nn.Tanh().to(self.device))
+            # KMaxPooling ,extract top_k, returns two tensors [values, indices]
+            if i == 1:
+                k = min(k, n)
+            module_list.append(KMaxPooling(k = k, axis = 2, device = self.device).to(self.device))
+
+        self.conv_layer = nn.Sequential(*module_list)
+        self.to(device)
+
+    def forward(self, inputs):
+        return self.conv_layer(inputs)
diff --git a/deepctr_torch/layers/sequence.py b/deepctr_torch/layers/sequence.py
@@ -0,0 +1,30 @@
+import torch.nn as nn
+import torch
+class KMaxPooling(nn.Module):
+    """K Max pooling that selects the k biggest value along the specific axis.
+
+      Input shape
+        -  nD tensor with shape: ``(batch_size, ..., input_dim)``.
+
+      Output shape
+        - nD tensor with shape: ``(batch_size, ..., output_dim)``.
+
+      Arguments
+        - **k**: positive integer, number of top elements to look for along the ``axis`` dimension.
+
+        - **axis**: positive integer, the dimension to look for elements.
+
+     """
+    def __init__(self, k, axis, device='cpu'):
+        super(KMaxPooling, self).__init__()
+        self.k = k
+        self.axis = axis
+        self.to(device)
+
+    def forward(self, input):
+        out = torch.topk(input, k=self.k, dim=self.axis, sorted=True)[0]
+        return out
+
+
+
+
diff --git a/deepctr_torch/models/__init__.py b/deepctr_torch/models/__init__.py
@@ -9,4 +9,5 @@
 from .mlr import MLR
 from .onn import ONN
 from .pnn import PNN
+from .ccpm import CCPM
 NFFM = ONN
diff --git a/deepctr_torch/models/ccpm.py b/deepctr_torch/models/ccpm.py
@@ -0,0 +1,87 @@
+# -*- coding:utf-8 -*-
+"""
+
+Author:
+    Zeng Kai,[email protected]
+
+Reference:
+    [1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.
+    (http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
+
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .basemodel import BaseModel
+from ..layers.core import DNN, Conv2dSame
+from ..layers.utils import concat_fun
+from ..layers.sequence import KMaxPooling
+from ..layers.interaction import ConvLayer
+
+
+class CCPM(BaseModel):
+    """Instantiates the Convolutional Click Prediction Model architecture.
+
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param embedding_size: positive integer,sparse feature embedding_size
+    :param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer.
+    :param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer.
+    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN.
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param init_std: float,to use as the initialize std of embedding vector
+    :param seed: integer ,to use as random seed.
+    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
+    :param device: str, ``"cpu"`` or ``"cuda:0"``
+    :return: A PyTorch model instance.
+
+    """
+
+    def __init__(self, linear_feature_columns, dnn_feature_columns, embedding_size=8, conv_kernel_width=(6, 5),
+                 conv_filters=(4, 4),
+                 dnn_hidden_units=(256,), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0,
+                 init_std=0.0001, seed=1024, task='binary', device='cpu', dnn_use_bn=False, dnn_activation=F.relu):
+
+        super(CCPM, self).__init__(linear_feature_columns, dnn_feature_columns, embedding_size=embedding_size,
+                                   dnn_hidden_units=dnn_hidden_units,
+                                   l2_reg_linear=l2_reg_linear,
+                                   l2_reg_embedding=l2_reg_embedding, l2_reg_dnn=l2_reg_dnn, init_std=init_std,
+                                   seed=seed,
+                                   dnn_dropout=dnn_dropout, dnn_activation=dnn_activation,
+                                   task=task, device=device)
+
+        if len(conv_kernel_width) != len(conv_filters):
+            raise ValueError(
+                "conv_kernel_width must have same element with conv_filters")
+
+        filed_size = self.compute_input_dim(dnn_feature_columns, embedding_size, include_dense=False, feature_group=True)
+        self.conv_layer = ConvLayer(filed_size=filed_size, conv_kernel_width=conv_kernel_width, conv_filters=conv_filters, device=device)
+        self.dnn_input_dim = 3 * embedding_size * conv_filters[-1]
+        self.dnn = DNN(self.dnn_input_dim, dnn_hidden_units,
+                           activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
+                           init_std=init_std, device=device)
+        self.dnn_linear = nn.Linear(dnn_hidden_units[-1], 1, bias=False).to(device)
+        self.add_regularization_loss(
+                filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2_reg_dnn)
+        self.add_regularization_loss(self.dnn_linear.weight, l2_reg_dnn)
+
+        self.to(device)
+
+
+    def forward(self, X):
+        linear_logit = self.linear_model(X)
+        sparse_embedding_list, _ = self.input_from_feature_columns(X, self.dnn_feature_columns,
+                                                                   self.embedding_dict, support_dense=True)
+        conv_input = concat_fun(sparse_embedding_list, axis=1)
+        conv_input_concact = torch.unsqueeze(conv_input, 1)
+        pooling_result = self.conv_layer(conv_input_concact)
+        flatten_result = pooling_result.view(pooling_result.size(0), -1)
+        dnn_output = self.dnn(flatten_result)
+        dnn_logit = self.dnn_linear(dnn_output)
+        logit = linear_logit + dnn_logit
+        y_pred = self.out(logit)
+        return y_pred
diff --git a/examples/run_classification_criteo.py b/examples/run_classification_criteo.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
-
+import sys
+sys.path.append('/home/SENSETIME/zengkai/final_deepCTR/DeepCTR-Torch')
 import pandas as pd
 from sklearn.metrics import log_loss, roc_auc_score
 from sklearn.model_selection import train_test_split
@@ -10,7 +11,7 @@
 
 
 if __name__ == "__main__":
-    data = pd.read_csv('./criteo_sample.txt')
+    data = pd.read_csv('/home/SENSETIME/zengkai/final_deepCTR/DeepCTR-Torch/examples/criteo_sample.txt')
 
     sparse_features = ['C' + str(i) for i in range(1, 27)]
     dense_features = ['I' + str(i) for i in range(1, 14)]
@@ -52,13 +53,13 @@
         print('cuda ready...')
         device = 'cuda:0'
 
-    model = DeepFM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, task='binary',
+    model = CCPM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, task='binary',
                    l2_reg_embedding=1e-5, device=device)
 
     model.compile("adagrad", "binary_crossentropy",
                   metrics=["binary_crossentropy", "auc"],)
     model.fit(train_model_input, train[target].values,
-              batch_size=256, epochs=10, validation_split=0.2, verbose=2)
+              batch_size=32, epochs=10, validation_split=0.2, verbose=2)
 
     pred_ans = model.predict(test_model_input, 256)
     print("")