Skip to content

Commit

Permalink
init import
Browse files Browse the repository at this point in the history
  • Loading branch information
kevwan committed Sep 20, 2021
1 parent 261c4fd commit 79103c2
Show file tree
Hide file tree
Showing 17 changed files with 1,806 additions and 0 deletions.
26 changes: 26 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Ignore all
*

# Unignore all with extensions
!*.*
!**/Dockerfile
!**/Makefile

# Unignore all dirs
!*/
!api

# ignore
.idea
**/.DS_Store
**/logs

# ignore adhoc test code
**/adhoc

# gitlab ci
.cache

# vim auto backup file
*~
!OWNERS
5 changes: 5 additions & 0 deletions bot/adapters/input/inputadapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package input

type InputAdapter interface {
Process(interface{})
}
244 changes: 244 additions & 0 deletions bot/adapters/logic/closestmatch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
package logic

import (
"fmt"
"math"
"sort"

"github.com/kevwan/chatbot/bot/adapters/storage"
"github.com/kevwan/chatbot/bot/nlp"
"github.com/tal-tech/go-zero/core/mr"
)

const (
chunkSize = 10000
topAnswerSize = 10
)

type (
sourceAndTargets struct {
source string
targets []string
}

questionAndScore struct {
question string
score float32
}

answerAndOccurrence struct {
answer string
occurrence int
}

topOccurAnswers struct {
answers []*answerAndOccurrence
}

topScoreQuestions struct {
questions []questionAndScore
}

closestMatch struct {
verbose bool
storage storage.StorageAdapter
tops int
}
)

func NewClosestMatch(storage storage.StorageAdapter, tops int) LogicAdapter {
return &closestMatch{
storage: storage,
tops: tops,
}
}

func (match *closestMatch) CanProcess(string) bool {
return true
}

func (match *closestMatch) Process(text string) []Answer {
if responses, ok := match.storage.Find(text); ok {
return match.processExactMatch(responses)
} else {
return match.processSimilarMatch(text)
}
}

func (match *closestMatch) SetVerbose() {
match.verbose = true
}

func (match *closestMatch) processExactMatch(responses map[string]int) []Answer {
var top topOccurAnswers

for key, occurrence := range responses {
top.put(key, occurrence)
}

sort.Slice(top.answers, func(i, j int) bool {
return top.answers[i].occurrence > top.answers[j].occurrence
})

tops := match.tops
if len(top.answers) < tops {
tops = len(top.answers)
}

answers := make([]Answer, tops)
for i := 0; i < tops; i++ {
answers[i].Content = top.answers[i].answer
answers[i].Confidence = 1
}

return answers
}

func (match *closestMatch) processSimilarMatch(text string) []Answer {
result, ok := mr.MapReduce(generator(match, text), mapper(match), reducer(match))
if !ok {
return nil
}

var answers []Answer
slice := result.([]questionAndScore)
for _, each := range slice {
if each.score > 0 {
if responses, ok := match.storage.Find(each.question); ok {
matches := match.processExactMatch(responses)
if len(matches) > 0 {
answers = append(answers, Answer{
Content: matches[0].Content,
Confidence: each.score,
})
}
}
}
}

return answers
}

func (top *topOccurAnswers) put(answer string, occurrence int) {
if len(top.answers) < topAnswerSize {
top.answers = append(top.answers, &answerAndOccurrence{
answer: answer,
occurrence: occurrence,
})
} else {
var leastIndex int
leastOccurrence := math.MaxInt32

for i, each := range top.answers {
if each.occurrence < leastOccurrence {
leastOccurrence = each.occurrence
leastIndex = i
}
}

if leastOccurrence < occurrence {
top.answers[leastIndex] = &answerAndOccurrence{
answer: answer,
occurrence: occurrence,
}
}
}
}

func generator(match *closestMatch, text string) mr.GenerateFunc {
return func(source chan interface{}) {
keys := match.storage.Search(text)
if match.verbose {
printMatches(keys)
}

chunks := splitStrings(keys, chunkSize)
for _, chunk := range chunks {
source <- sourceAndTargets{
source: text,
targets: chunk,
}
}
}
}

func mapper(match *closestMatch) mr.MapperFunc {
return func(data interface{}, writer mr.Writer, cancel func()) {
tops := newTopScoreQuestions(match.tops)
pair := data.(sourceAndTargets)
for i := range pair.targets {
score := nlp.SimilarityForStrings(pair.source, pair.targets[i])
tops.add(questionAndScore{
question: pair.targets[i],
score: score,
})
}

writer.Write(tops)
}
}

func reducer(match *closestMatch) mr.ReducerFunc {
return func(input chan interface{}, writer mr.Writer, cancel func()) {
tops := newTopScoreQuestions(match.tops)
for each := range input {
qs := each.(*topScoreQuestions)
for _, question := range qs.questions {
tops.add(question)
}
}

sort.Slice(tops.questions, func(i, j int) bool {
return tops.questions[i].score > tops.questions[j].score
})

writer.Write(tops.questions)
}
}

func splitStrings(slice []string, size int) [][]string {
var result [][]string
count := len(slice)

for i := 0; i < count; i += size {
var end int
if i+size < count {
end = i + size
} else {
end = count
}
result = append(result, slice[i:end])
}

return result
}

func printMatches(matches []string) {
fmt.Println("matched size:", len(matches))
for i, sentence := range matches {
if i > 10 {
break
}
fmt.Println("\t", sentence)
}
}

func newTopScoreQuestions(n int) *topScoreQuestions {
return &topScoreQuestions{
questions: make([]questionAndScore, n),
}
}

func (tq *topScoreQuestions) add(q questionAndScore) {
var score float32 = 1
var index int
for i, each := range tq.questions {
if each.score < score {
score = each.score
index = i
}
}
if q.score > score {
tq.questions[index] = q
}
}
35 changes: 35 additions & 0 deletions bot/adapters/logic/combomatch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package logic

type comboMatch struct {
matches []LogicAdapter
}

func NewComboMatch(matches ...LogicAdapter) LogicAdapter {
return &comboMatch{
matches: matches,
}
}

func (match *comboMatch) CanProcess(question string) bool {
for _, each := range match.matches {
if each.CanProcess(question) {
return true
}
}
return false
}

func (match *comboMatch) Process(question string) []Answer {
for _, each := range match.matches {
if each.CanProcess(question) {
return each.Process(question)
}
}
return nil
}

func (match *comboMatch) SetVerbose() {
for _, each := range match.matches {
each.SetVerbose()
}
}
14 changes: 14 additions & 0 deletions bot/adapters/logic/logicadapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package logic

type (
Answer struct {
Content string
Confidence float32
}

LogicAdapter interface {
CanProcess(string) bool
Process(string) []Answer
SetVerbose()
}
)
5 changes: 5 additions & 0 deletions bot/adapters/output/outputadapter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package output

type OutputAdapter interface {
Process(string, float32) (string, bool)
}
8 changes: 8 additions & 0 deletions bot/adapters/storage/gobstorage.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package storage

import "encoding/gob"

type GobStorage interface {
StorageAdapter
SetOutput(*gob.Encoder)
}
Loading

0 comments on commit 79103c2

Please sign in to comment.