-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcategorizer.js
142 lines (123 loc) · 4.17 KB
/
categorizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
let time = performance.now();
// given category name and data in which we supply to the categorizer to use
let categories = [
{
name: 'test',
data: [
'my dog is cool',
'my dog is nice',
'flying cars are the new wave',
'tesla keeps improving self driving',
'word pairs suck',
'emojis are a better representable way of writing text messages',
'my dog got stuck in the cars window',
'cats are cool',
'cats are better than dogs',
],
},
{
name: 'other',
data: [
'birds fly high',
'fish swim in water',
'walmart has good selective items',
'coffee is worse than drinking soda',
'headaches suck stop overthinking',
'categorizer prompts are time consuming sob sobbering crying hearting lol laughing out loud',
],
},
];
// Initialize an object to store word counts for each category
let wordCounts = {};
categories.forEach((category) => {
wordCounts[category.name] = {}; // Initialize the category-specific word count object
category.data.forEach((sentence) => {
let words = sentence.split(' ');
words.forEach((word) => {
word = word.toLowerCase(); // Convert the word to lowercase for case-insensitivity
if (wordCounts[category.name][word]) {
wordCounts[category.name][word]++;
} else {
wordCounts[category.name][word] = 1;
}
});
});
});
let categoryDetermined = false; // Flag to ensure category determination happens only once
function determineCategory(prompt) {
if (categoryDetermined) return null; // Return null if already determined
let words = prompt.split(' ');
let categoryConfidence = {};
categories.forEach((category) => {
categoryConfidence[category.name] = 0;
});
words.forEach((w) => {
w = w.toLowerCase(); // Convert the word to lowercase for consistency
categories.forEach((category) => {
if (wordCounts[category.name][w]) {
// detrmine confidence based on how many times the word was stored
categoryConfidence[category.name] += wordCounts[category.name][w];
}
});
});
let maxConfidence = -1;
let categoryForPrompt = null;
for (const categoryName in categoryConfidence) {
if (
categoryConfidence[categoryName] >= 5 &&
categoryConfidence[categoryName] >= maxConfidence
) {
maxConfidence = categoryConfidence[categoryName];
categoryForPrompt = categoryName;
return {
cat: categoryForPrompt,
confidence: maxConfidence,
tokens: {
count: categoryConfidence[categoryName],
token_words: words,
},
};
} else {
let possiblecat = [];
let possibleconfidence = 0;
words.forEach((w) => {
w = w.toLowerCase();
if (!wordCounts[w] || wordCounts[w] > 0) {
wordCounts[w] = 0;
categories.forEach((c) => {
let data = c.data;
wordCounts[c.name][w]
? (possibleconfidence += wordCounts[c.name][w])
: null;
// we check if category has the word in its dataset if so we can find a best returned response
if (data.find((wr) => wr.includes(w))) {
possiblecat.push(c);
}
});
}
});
if (possiblecat.length > 0) {
let called = {};
possiblecat.forEach((category) => {
!called[category.name] ? (called[category.name] = '') : null;
});
if (Object.keys(called).length > 1) {
let names = '';
Object.keys(called).forEach((k) => {
names += ' ' + '{' + k + '}';
});
return `Categories your prompt best fit's in ${names},\npredicted confidence: ${possibleconfidence}, accurate confidence: ${categoryConfidence[categoryName]}`;
}
return `Insufficient data for the prompt: ${prompt}\n Confidence: ${categoryConfidence[categoryName]}`;
}
}
}
categoryDetermined = true; // Set the flag to true to prevent further determinations
return categoryForPrompt;
}
// Example usage
const prompt = 'dogs are cool';
console.log(determineCategory(prompt));
console.log(
`\nTotal elapsed time taken: ${Math.round(performance.now() - time)}ms`
);