diff --git a/.gitignore b/.gitignore index 4e2c998..d887e52 100644 --- a/.gitignore +++ b/.gitignore @@ -25,5 +25,6 @@ # corpus files data **/*.corpus.json +**/*.yml **/*.gob **/stopwords.txt diff --git a/bot/corpus/corpus.go b/bot/corpus/corpus.go index 0a3e219..6ce6c2f 100644 --- a/bot/corpus/corpus.go +++ b/bot/corpus/corpus.go @@ -2,10 +2,19 @@ package corpus import ( "encoding/json" + "fmt" "io/ioutil" "os" + "path/filepath" + + "gopkg.in/yaml.v2" ) +type Corpus struct { + Categories []string `json:"categories"` + Conversations [][]string `json:"conversations"` +} + func LoadCorpora(filePaths []string) (map[string][][]string, error) { result := make(map[string][][]string) @@ -23,15 +32,45 @@ func LoadCorpora(filePaths []string) (map[string][][]string, error) { } func readCorpus(file string) (map[string][][]string, error) { - var result map[string][][]string - - if f, err := os.Open(file); err != nil { + f, err := os.Open(file) + if err != nil { return nil, err - } else if content, err := ioutil.ReadAll(f); err != nil { + } + + ext := filepath.Ext(file) + content, err := ioutil.ReadAll(f) + if err != nil { return nil, err - } else if err := json.Unmarshal(content, &result); err != nil { + } + + ret, err := unmarshal(ext, content) + if err != nil { return nil, err - } else { - return result, nil } + + return ret, nil +} + +func unmarshal(ext string, content []byte) (map[string][][]string, error) { + var corpus Corpus + ret := make(map[string][][]string) + + switch ext { + case ".json": + if err := json.Unmarshal(content, &corpus); err != nil { + return nil, err + } + case ".yml", ".yaml": + if err := yaml.Unmarshal(content, &corpus); err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown file type: %s", ext) + } + + for _, v := range corpus.Categories { + ret[v] = corpus.Conversations + } + + return ret, nil } diff --git a/go.mod b/go.mod index e2baf3b..5582b2f 100644 --- a/go.mod +++ b/go.mod @@ -5,4 +5,5 @@ go 1.15 require ( github.com/tal-tech/go-zero v1.2.1 github.com/wangbin/jiebago v0.3.2 + gopkg.in/yaml.v2 v2.4.0 )