diff --git a/data-science/model/recsys.ipynb b/data-science/model/recsys.ipynb
new file mode 100644
index 0000000..577f67c
--- /dev/null
+++ b/data-science/model/recsys.ipynb
@@ -0,0 +1,600 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "382f9667-f39c-4766-9f56-ddf936941cec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# импортируем необходимые библиотеки\n",
+ "import pandas as pd\n",
+ "from scipy.sparse import csr_matrix\n",
+ "from implicit.cpu.als import AlternatingLeastSquares\n",
+ "from implicit.evaluation import mean_average_precision_at_k"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "474de742-7219-4777-9afa-8ae2b268aa45",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv('interactions.csv', parse_dates=['last_watch_dt'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "c712c399-8c1f-4394-a03a-97a19cbacd2b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user_id | \n",
+ " item_id | \n",
+ " last_watch_dt | \n",
+ " total_dur | \n",
+ " watched_pct | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 176549 | \n",
+ " 9506 | \n",
+ " 2021-05-11 | \n",
+ " 4250 | \n",
+ " 72.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 699317 | \n",
+ " 1659 | \n",
+ " 2021-05-29 | \n",
+ " 8317 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 656683 | \n",
+ " 7107 | \n",
+ " 2021-05-09 | \n",
+ " 10 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 864613 | \n",
+ " 7638 | \n",
+ " 2021-07-05 | \n",
+ " 14483 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964868 | \n",
+ " 9506 | \n",
+ " 2021-04-30 | \n",
+ " 6725 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 5476246 | \n",
+ " 648596 | \n",
+ " 12225 | \n",
+ " 2021-08-13 | \n",
+ " 76 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 5476247 | \n",
+ " 546862 | \n",
+ " 9673 | \n",
+ " 2021-04-13 | \n",
+ " 2308 | \n",
+ " 49.0 | \n",
+ "
\n",
+ " \n",
+ " 5476248 | \n",
+ " 697262 | \n",
+ " 15297 | \n",
+ " 2021-08-20 | \n",
+ " 18307 | \n",
+ " 63.0 | \n",
+ "
\n",
+ " \n",
+ " 5476249 | \n",
+ " 384202 | \n",
+ " 16197 | \n",
+ " 2021-04-19 | \n",
+ " 6203 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " 5476250 | \n",
+ " 319709 | \n",
+ " 4436 | \n",
+ " 2021-08-15 | \n",
+ " 3921 | \n",
+ " 45.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5476251 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user_id item_id last_watch_dt total_dur watched_pct\n",
+ "0 176549 9506 2021-05-11 4250 72.0\n",
+ "1 699317 1659 2021-05-29 8317 100.0\n",
+ "2 656683 7107 2021-05-09 10 0.0\n",
+ "3 864613 7638 2021-07-05 14483 100.0\n",
+ "4 964868 9506 2021-04-30 6725 100.0\n",
+ "... ... ... ... ... ...\n",
+ "5476246 648596 12225 2021-08-13 76 0.0\n",
+ "5476247 546862 9673 2021-04-13 2308 49.0\n",
+ "5476248 697262 15297 2021-08-20 18307 63.0\n",
+ "5476249 384202 16197 2021-04-19 6203 100.0\n",
+ "5476250 319709 4436 2021-08-15 3921 45.0\n",
+ "\n",
+ "[5476251 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "5af68c1f-a8be-46c2-ab70-4cbec16284a5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 5476251 entries, 0 to 5476250\n",
+ "Data columns (total 5 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 user_id 5476251 non-null int64 \n",
+ " 1 item_id 5476251 non-null int64 \n",
+ " 2 last_watch_dt 5476251 non-null datetime64[ns]\n",
+ " 3 total_dur 5476251 non-null int64 \n",
+ " 4 watched_pct 5475423 non-null float64 \n",
+ "dtypes: datetime64[ns](1), float64(1), int64(3)\n",
+ "memory usage: 208.9 MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info(show_counts=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "2271d643-6004-4b77-814e-6fe0f16e4e69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['watched_pct'] = df['watched_pct'].fillna(0) #заполним пропущенные значения нулями"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "510bf1f1-28b4-442e-8dbe-f714ec2ea43f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(Timestamp('2021-03-13 00:00:00'), Timestamp('2021-08-22 00:00:00'))"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['last_watch_dt'].min(), df['last_watch_dt'].max()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "cb768800-08a8-4e77-816f-ca6f7b719f0d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user_id | \n",
+ " item_id | \n",
+ " last_watch_dt | \n",
+ " total_dur | \n",
+ " watched_pct | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 176549 | \n",
+ " 9506 | \n",
+ " 2021-05-11 | \n",
+ " 4250 | \n",
+ " 72.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 699317 | \n",
+ " 1659 | \n",
+ " 2021-05-29 | \n",
+ " 8317 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 656683 | \n",
+ " 7107 | \n",
+ " 2021-05-09 | \n",
+ " 10 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 864613 | \n",
+ " 7638 | \n",
+ " 2021-07-05 | \n",
+ " 14483 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964868 | \n",
+ " 9506 | \n",
+ " 2021-04-30 | \n",
+ " 6725 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 5476245 | \n",
+ " 786732 | \n",
+ " 4880 | \n",
+ " 2021-05-12 | \n",
+ " 753 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 5476246 | \n",
+ " 648596 | \n",
+ " 12225 | \n",
+ " 2021-08-13 | \n",
+ " 76 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 5476247 | \n",
+ " 546862 | \n",
+ " 9673 | \n",
+ " 2021-04-13 | \n",
+ " 2308 | \n",
+ " 49.0 | \n",
+ "
\n",
+ " \n",
+ " 5476249 | \n",
+ " 384202 | \n",
+ " 16197 | \n",
+ " 2021-04-19 | \n",
+ " 6203 | \n",
+ " 100.0 | \n",
+ "
\n",
+ " \n",
+ " 5476250 | \n",
+ " 319709 | \n",
+ " 4436 | \n",
+ " 2021-08-15 | \n",
+ " 3921 | \n",
+ " 45.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5051815 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user_id item_id last_watch_dt total_dur watched_pct\n",
+ "0 176549 9506 2021-05-11 4250 72.0\n",
+ "1 699317 1659 2021-05-29 8317 100.0\n",
+ "2 656683 7107 2021-05-09 10 0.0\n",
+ "3 864613 7638 2021-07-05 14483 100.0\n",
+ "4 964868 9506 2021-04-30 6725 100.0\n",
+ "... ... ... ... ... ...\n",
+ "5476245 786732 4880 2021-05-12 753 0.0\n",
+ "5476246 648596 12225 2021-08-13 76 0.0\n",
+ "5476247 546862 9673 2021-04-13 2308 49.0\n",
+ "5476249 384202 16197 2021-04-19 6203 100.0\n",
+ "5476250 319709 4436 2021-08-15 3921 45.0\n",
+ "\n",
+ "[5051815 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#разделим данные на обучающую и тестовую выборку - для теста возьмем последние 7 дней\n",
+ "test = df[df['last_watch_dt'] > '2021-08-15']\n",
+ "train = df[df['last_watch_dt'] <= '2021-08-15']\n",
+ "train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "893fd034-c28a-47e8-a8d7-38a27f17991c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "user_id 906071\n",
+ "item_id 15577\n",
+ "last_watch_dt 156\n",
+ "total_dur 126663\n",
+ "watched_pct 101\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "0be849ba-66fb-4a28-ab63-81fb740c38e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "user_id 167348\n",
+ "item_id 7106\n",
+ "last_watch_dt 7\n",
+ "total_dur 38328\n",
+ "watched_pct 101\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "a9253f90-8245-449c-a119-d68faba82874",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#предположим, что процент времени просмотра линейно связан с вероятностью того, что контент понравился\n",
+ "\n",
+ "train_df = train[['user_id', 'item_id', 'watched_pct']].copy()\n",
+ "test_df = test[['user_id', 'item_id', 'watched_pct']].copy()\n",
+ "\n",
+ "train_df['watched_pct'] = train_df['watched_pct'] / 100\n",
+ "test_df['watched_pct'] = test_df['watched_pct'] / 100\n",
+ "\n",
+ "#создадим разреженные матрицы взаимодействий users-items для обучающей и тестовой выборок\n",
+ "\n",
+ "userid = list(train_df['user_id'].unique())\n",
+ "itemid = list(train_df['item_id'].unique())\n",
+ "data = train_df['watched_pct'].tolist()\n",
+ "row = pd.Categorical(train_df['user_id'], categories=userid, ordered=True).codes\n",
+ "col = pd.Categorical(train_df['item_id'], categories=itemid, ordered=True).codes\n",
+ "train_matrix = csr_matrix((data, (row, col)), shape=(len(userid), len(itemid)))\n",
+ "\n",
+ "userid = list(test_df['user_id'].unique())\n",
+ "itemid = list(test_df['item_id'].unique())\n",
+ "data = test_df['watched_pct'].tolist()\n",
+ "row = pd.Categorical(test_df['user_id'], categories=userid, ordered=True).codes\n",
+ "col = pd.Categorical(test_df['item_id'], categories=itemid, ordered=True).codes\n",
+ "test_matrix = csr_matrix((data, (row, col)), shape=(len(userid), len(itemid)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "289cbc71-933d-400f-88c0-43144cf15977",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(<906071x15577 sparse matrix of type ''\n",
+ " \twith 5051815 stored elements in Compressed Sparse Row format>,\n",
+ " <167348x7106 sparse matrix of type ''\n",
+ " \twith 424436 stored elements in Compressed Sparse Row format>)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_matrix, test_matrix"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "54cd1509-5a95-4ce1-9048-c0a9f2fe253a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9e3828e203024fd585b547eab0c55daa",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/15 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#обучим рекомендательный алгоритм\n",
+ "\n",
+ "model = AlternatingLeastSquares(factors=64, regularization=0.05, random_state=42)\n",
+ "model.fit(train_matrix)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "c5f9f955-2e28-4d0a-86c2-6154a5acc3b2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5dda9a5f55d14f3aabf97a005196c830",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/167348 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0.0034842071949627938"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#посчитаем метрику map@10 \n",
+ "mean_average_precision_at_k(model, train_matrix, test_matrix, K=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5be3eace-facb-441e-819a-eb9ae4bef5b2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}