Merge pull request #18 from Hackathon-Hacked2023/backend-additional-f…

…iles Adding remaining model files
Hackathon-Hacked2023 · Jan 8, 2023 · c98c442 · c98c442
2 parents e1940e3 + d33e776
commit c98c442
Show file tree

Hide file tree

Showing 7 changed files with 50,151 additions and 0 deletions.
diff --git a/backend/model/.DS_Store b/backend/model/.DS_Store
diff --git a/backend/model/README.md b/backend/model/README.md
@@ -0,0 +1,65 @@
+---
+language: en
+tags:
+- text-classification
+- tensorflow
+- roberta
+datasets:
+- go_emotions
+license: mit
+---
+
+- [linkedin.com/in/arpanghoshal](https://www.linkedin.com/in/arpanghoshal)
+- [linkedin.com/in/rohanrkamath](https://www.linkedin.com/in/rohanrkamath)
+
+
+## What is GoEmotions
+
+Dataset labelled 58000 Reddit comments with 28 emotions
+
+- admiration, amusement, anger, annoyance, approval, caring, confusion, curiosity, desire, disappointment, disapproval, disgust, embarrassment, excitement, fear, gratitude, grief, joy, love, nervousness, optimism, pride, realization, relief, remorse, sadness, surprise + neutral
+
+
+## What is RoBERTa
+
+RoBERTa builds on BERT’s language masking strategy and modifies key hyperparameters in BERT, including removing BERT’s next-sentence pretraining objective, and training with much larger mini-batches and learning rates. RoBERTa was also trained on an order of magnitude more data than BERT, for a longer amount of time. This allows RoBERTa representations to generalize even better to downstream tasks compared to BERT.
+
+
+## Hyperparameters
+
+| Parameter         |      |
+| ----------------- | :---: |
+| Learning rate     | 5e-5 |
+| Epochs            |   10 |
+| Max Seq Length    |   50 |
+| Batch size        |   16 |
+| Warmup Proportion | 0.1 |
+| Epsilon      | 1e-8 |
+
+
+## Results
+
+Best Result of `Macro F1` - 49.30%
+
+## Usage
+
+```python
+
+from transformers import RobertaTokenizerFast, TFRobertaForSequenceClassification, pipeline
+
+tokenizer = RobertaTokenizerFast.from_pretrained("arpanghoshal/EmoRoBERTa")
+model = TFRobertaForSequenceClassification.from_pretrained("arpanghoshal/EmoRoBERTa")
+
+emotion = pipeline('sentiment-analysis', 
+                    model='arpanghoshal/EmoRoBERTa')
+
+emotion_labels = emotion("Thanks for using it.")
+print(emotion_labels)
+
+```
+Output 
+
+```
+[{'label': 'gratitude', 'score': 0.9964383244514465}]
+```
+
diff --git a/backend/model/config.json b/backend/model/config.json
@@ -0,0 +1,82 @@
+{
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "admiration",
+    "1": "amusement",
+    "2": "anger",
+    "3": "annoyance",
+    "4": "approval",
+    "5": "caring",
+    "6": "confusion",
+    "7": "curiosity",
+    "8": "desire",
+    "9": "disappointment",
+    "10": "disapproval",
+    "11": "disgust",
+    "12": "embarrassment",
+    "13": "excitement",
+    "14": "fear",
+    "15": "gratitude",
+    "16": "grief",
+    "17": "joy",
+    "18": "love",
+    "19": "nervousness",
+    "20": "optimism",
+    "21": "pride",
+    "22": "realization",
+    "23": "relief",
+    "24": "remorse",
+    "25": "sadness",
+    "26": "surprise",
+    "27": "neutral"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "admiration": 0,
+    "amusement": 1,
+    "anger": 2,
+    "annoyance": 3,
+    "approval": 4,
+    "caring": 5,
+    "confusion": 6,
+    "curiosity": 7,
+    "desire": 8,
+    "disappointment": 9,
+    "disapproval": 10,
+    "disgust": 11,
+    "embarrassment": 12,
+    "excitement": 13,
+    "fear": 14,
+    "gratitude": 15,
+    "grief": 16,
+    "joy": 17,
+    "love": 18,
+    "nervousness": 19,
+    "neutral": 27,
+    "optimism": 20,
+    "pride": 21,
+    "realization": 22,
+    "relief": 23,
+    "remorse": 24,
+    "sadness": 25,
+    "surprise": 26
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}