v0.3 ready

tae898 · Mar 15, 2022 · c17001b · c17001b
1 parent 9831601
commit c17001b
Show file tree

Hide file tree

Showing 12 changed files with 711 additions and 1 deletion.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+results/
+multimodal-datasets/
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,7 @@ checkpoints*
 
 # Pytorch models
 *.pt
+
+# pretrained models
+emoberta-base/
+emoberta-large/
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ At the moment, we only use the text modality to correctly classify the emotion o
 1. [`multimodal-datasets` repo](https://github.com/tae898/multimodal-datasets) (submodule)
 1. pip install -r requirements.txt
 
-## RoBERTa training
+## EmoBERTa training
 
 First configure the hyper parameters and the dataset in `train-erc-text.yaml` and then,
 In this directory run the below commands. I recommend you to run this in a virtualenv.
@@ -40,6 +40,71 @@ If you want to see more training test details, check out `./results/`
 
 If you want to download the trained checkpoints and stuff, then [here](https://surfdrive.surf.nl/files/index.php/s/khREwk4MUI7MSnO/download) is where you can download them. It's a pretty big zip file.
 
+## Deployment
+
+### Huggingface
+
+We have released our models on huggingface:
+
+- [emoberta-base](https://huggingface.co/tae898/emoberta-base)
+- [emoberta-large](https://huggingface.co/tae898/emoberta-large)
+
+They are based on [RoBERTa-base](https://huggingface.co/roberta-base) and [RoBERTa-large](https://huggingface.co/roberta-large), respectively. They were trained on [both MELD and IEMOCAP datasets](utterance-ordered-MELD_IEMOCAP.json). Our deployed models are neither speaker-aware nor take previous utterances into account, meaning that it only classifies one utterance at a time without the speaker information (e.g., "I love you").
+
+### Flask app
+
+You can either run the Flask RESTful server app as a docker container or just as a python script.
+
+1. Running the app as a docker container **(recommended)**.
+
+   There are four images. Take what you need:
+
+   - `docker run -it --rm -p 10006:10006 tae898/emoberta-base`
+   - `docker run -it --rm -p 10006:10006 --gpus all tae898/emoberta-base-cuda`
+   - `docker run -it --rm -p 10006:10006 tae898/emoberta-large`
+   - `docker run -it --rm -p 10006:10006 --gpus all tae898/emoberta-large-cuda`
+
+1. Running the app in your python environment:
+
+   This method is less recommended than the docker one.
+
+   Run `pip install -r requirements-deploy.txt` first.<br>
+   The [`app.py`](app.py) is a flask RESTful server. The usage is below:
+
+   ```console
+   app.py [-h] [--host HOST] [--port PORT] [--device DEVICE] [--model-type MODEL_TYPE]
+   ```
+
+   For example:
+
+   ```sh
+   python app.py --host 0.0.0.0 --port 10006 --device cpu --model-type emoberta-base
+   ```
+
+### Client
+
+Once the app is running, you can send a text to the server. You can do so with a [client.py](client.py).
+
+For example:
+
+```sh
+python client.py --text "Emotion recognition is so cool\!"
+```
+
+will give you:
+
+```json
+{
+    "neutral": 0.0049800905,
+    "joy": 0.96399665,
+    "surprise": 0.018937444,
+    "anger": 0.0071516023,
+    "sadness": 0.002021492,
+    "disgust": 0.001495996,
+    "fear": 0.0014167271
+}
+```
+
 ## Troubleshooting
 
 The best way to find and solve your problems is to see in the github issue tab. If you can't find what you want, feel free to raise an issue. We are pretty responsive.

diff --git a/app.py b/app.py
@@ -0,0 +1,128 @@
+"""Emoberta app"""
+import argparse
+import logging
+import os
+
+import jsonpickle
+import torch
+from flask import Flask, request
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+
+# ---------------------- GLOBAL VARIABLES ---------------------- #
+emotions = [
+    "neutral",
+    "joy",
+    "surprise",
+    "anger",
+    "sadness",
+    "disgust",
+    "fear",
+]
+id2emotion = {idx: emotion for idx, emotion in enumerate(emotions)}
+
+tokenizer = None
+model = None
+device = None
+
+app = Flask(__name__)
+# --------------------------------------------------------------- #
+
+
+def load_tokenizer_model(model_type: str, device_: str) -> None:
+    """Load tokenizer and model.
+
+    Args
+    ----
+    model_type: Should be either "emoberta-base" or "emoberta-large"
+    device_: "cpu" or "cuda"
+
+    """
+    if "large" in model_type.lower():
+        model_type = "emoberta-large"
+    elif "base" in model_type.lower():
+        model_type = "emoberta-base"
+    else:
+        raise ValueError(
+            f"{model_type} is not a valid model type! Should be 'base' or 'large'."
+        )
+
+    if not os.path.isdir(model_type):
+        model_type = f"tae898/{model_type}"
+
+    global device
+    device = device_
+    global tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_type)
+    global model
+    model = AutoModelForSequenceClassification.from_pretrained(model_type)
+    model.eval()
+    model.to(device)
+
+
+@app.route("/", methods=["POST"])
+def run_emoberta():
+    """Receive everything in json!!!"""
+    app.logger.debug("Receiving data ...")
+    data = request.json
+    data = jsonpickle.decode(data)
+
+    text = data["text"]
+
+    app.logger.info(f"raw text received: {text}")
+
+    tokens = tokenizer(text, truncation=True)
+
+    tokens["input_ids"] = torch.tensor(tokens["input_ids"]).view(1, -1).to(device)
+    tokens["attention_mask"] = (
+        torch.tensor(tokens["attention_mask"]).view(1, -1).to(device)
+    )
+
+    outputs = model(**tokens)
+    outputs = torch.softmax(outputs["logits"].detach().cpu(), dim=1).squeeze().numpy()
+    outputs = {id2emotion[idx]: prob.item() for idx, prob in enumerate(outputs)}
+    app.logger.info(f"prediction: {outputs}")
+
+    response = jsonpickle.encode(outputs)
+    app.logger.info("json-pickle is done.")
+
+    return response
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="emoberta app.")
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="0.0.0.0",
+        help="host ip address",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=10006,
+        help="port number",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        help="cpu or cuda",
+    )
+    parser.add_argument(
+        "--model-type",
+        type=str,
+        default="emoberta-base",
+        help="should be either emoberta-base or emoberta-large",
+    )
+
+    args = parser.parse_args()
+    load_tokenizer_model(args.model_type, args.device)
+
+    app.run(host=args.host, port=args.port)
diff --git a/client.py b/client.py
@@ -0,0 +1,45 @@
+"""
+This is just a simple client example. Hack it as much as you want. 
+"""
+import argparse
+import logging
+
+import jsonpickle
+import requests
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+
+def run_text(text: str, url_emoberta: str) -> None:
+    """Send data to the flask server.
+
+    Args
+    ----
+    text: raw text
+    url_emoberta: e.g., http://127.0.0.1:10006/
+
+    """
+    data = {"text": text}
+
+    logging.debug("sending text to server...")
+    data = jsonpickle.encode(data)
+    response = requests.post(url_emoberta, json=data)
+    logging.info(f"got {response} from server!...")
+    print(response.text)
+    response = jsonpickle.decode(response.text)
+
+    logging.info(f"emoberta results: {response}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Classify room type")
+    parser.add_argument("--url-emoberta", type=str, default="http://127.0.0.1:10006/")
+    parser.add_argument("--text", type=str, required=True)
+
+    args = vars(parser.parse_args())
+
+    run_text(**args)
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
@@ -0,0 +1,15 @@
+FROM python:3.8.12
+ENV DEBIAN_FRONTEND=noninteractive 
+
+WORKDIR /app
+
+COPY emoberta-base ./emoberta-base
+COPY utils ./utils
+COPY app.py ./
+COPY requirements-deploy.txt ./
+
+RUN apt update
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install -r requirements-deploy.txt
+
+CMD ["python3.8", "app.py", "--model-type", "emoberta-base", "--device", "cpu"]
diff --git a/docker/Dockerfile-base-cuda b/docker/Dockerfile-base-cuda
@@ -0,0 +1,21 @@
+FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive 
+
+WORKDIR /app
+
+COPY emoberta-base ./emoberta-base
+COPY utils ./utils
+COPY app.py ./
+COPY requirements-deploy.txt ./
+
+RUN apt update
+RUN apt install software-properties-common -y
+RUN add-apt-repository ppa:deadsnakes/ppa -y
+RUN apt update
+RUN apt install python3.8 python3.8-dev python3-pip -y
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install -r requirements-deploy.txt
+
+CMD ["python3.8", "app.py", "--model-type", "emoberta-base", "--device", "cuda"]
+
diff --git a/docker/Dockerfile-large b/docker/Dockerfile-large
@@ -0,0 +1,15 @@
+FROM python:3.8.12
+ENV DEBIAN_FRONTEND=noninteractive 
+
+WORKDIR /app
+
+COPY emoberta-large ./emoberta-large
+COPY utils ./utils
+COPY app.py ./
+COPY requirements-deploy.txt ./
+
+RUN apt update
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install -r requirements-deploy.txt
+
+CMD ["python3.8", "app.py", "--model-type", "emoberta-large", "--device", "cpu"]
diff --git a/docker/Dockerfile-large-cuda b/docker/Dockerfile-large-cuda
@@ -0,0 +1,21 @@
+FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive 
+
+WORKDIR /app
+
+COPY emoberta-large ./emoberta-large
+COPY utils ./utils
+COPY app.py ./
+COPY requirements-deploy.txt ./
+
+RUN apt update
+RUN apt install software-properties-common -y
+RUN add-apt-repository ppa:deadsnakes/ppa -y
+RUN apt update
+RUN apt install python3.8 python3.8-dev python3-pip -y
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install -r requirements-deploy.txt
+
+CMD ["python3.8", "app.py", "--model-type", "emoberta-large", "--device", "cuda"]
+