Skip to content

Commit

Permalink
feat: data/histogram: add ParseFileCSV(), ParseTable(), Transform…
Browse files Browse the repository at this point in the history
…BinNames()`, `TransformBinNamesByPrefix()`
  • Loading branch information
grokify committed May 27, 2021
1 parent 1f73d22 commit 4c28118
Show file tree
Hide file tree
Showing 8 changed files with 768 additions and 30 deletions.
16 changes: 6 additions & 10 deletions data/table/documents.go → data/histogram/documents.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
package table

import (
"github.com/grokify/gocharts/data/histogram"
)
package histogram

type DocumentsSet struct {
Meta DocumentsSetMeta `json:"meta"`
Expand All @@ -20,7 +16,7 @@ func (ds *DocumentsSet) Inflate() {
}

func (ds *DocumentsSet) CreateHistogram(key string) {
hg := histogram.NewHistogram()
hg := NewHistogram()

//histogram := map[string]int{}
for _, doc := range ds.Documents {
Expand All @@ -33,17 +29,17 @@ func (ds *DocumentsSet) CreateHistogram(key string) {
}
hg.Inflate()
if ds.Meta.Histograms == nil {
ds.Meta.Histograms = map[string]histogram.Histogram{}
ds.Meta.Histograms = map[string]*Histogram{}
}
ds.Meta.Histograms[key] = hg
}

type DocumentsSetMeta struct {
Count int `json:"count"`
Histograms map[string]histogram.Histogram `json:"histograms"`
Count int `json:"count"`
Histograms map[string]*Histogram `json:"histograms"`
}

func NewDocumentsSetMeta() DocumentsSetMeta {
return DocumentsSetMeta{
Histograms: map[string]histogram.Histogram{}}
Histograms: map[string]*Histogram{}}
}
69 changes: 69 additions & 0 deletions data/histogram/documents_read.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package histogram

import (
"io"
"io/ioutil"
"os"

"github.com/grokify/simplego/encoding/csvutil"
"github.com/grokify/simplego/encoding/jsonutil"
"github.com/grokify/simplego/type/stringsutil"
)

func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inStripBom bool, andFilter map[string]stringsutil.MatchInfo) (DocumentsSet, error) {
//data := JsonRecordsInfo{Records: []map[string]string{}}
data := NewDocumentsSet()

for _, inPath := range inPaths {
reader, inFile, err := csvutil.NewReader(inPath, inComma, inStripBom)
if err != nil {
return data, err
}

csvHeader := csvutil.CSVHeader{}
j := -1

for {
line, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
return data, err
}
j++

if j == 0 {
csvHeader.Columns = line
continue
}
match, err := csvHeader.RecordMatch(line, andFilter)
if err != nil {
return data, err
}
if !match {
continue
}

mss := csvHeader.RecordToMSS(line)
data.Documents = append(data.Documents, mss)
}
err = inFile.Close()
if err != nil {
return data, err
}
}
data.Inflate()
return data, nil
}

func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, inStripBom bool, perm os.FileMode, andFilter map[string]stringsutil.MatchInfo) error {
data, err := ReadMergeFilterCSVFiles(inPaths, outPath, inComma, inStripBom, andFilter)
if err != nil {
return err
}
bytes, err := jsonutil.MarshalSimple(data, "", " ")
if err != nil {
return err
}
return ioutil.WriteFile(outPath, bytes, perm)
}
12 changes: 6 additions & 6 deletions data/histogram/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ type Histogram struct {
BinsFrequency map[string]int `json:"binsFrequency"`
}

func NewHistogram() Histogram {
return Histogram{BinsFrequency: map[string]int{}}
func NewHistogram() *Histogram {
return &Histogram{BinsFrequency: map[string]int{}}
}

func (h *Histogram) Inflate() {
Expand All @@ -26,8 +26,8 @@ func (h *Histogram) Add(bin string, count int) {
}

type HistogramSet struct {
Meta HistogramSetMetadata `json:"meta,omitempty"`
HistogramMap map[string]Histogram `json:"histograms"`
Meta HistogramSetMetadata `json:"meta,omitempty"`
HistogramMap map[string]*Histogram `json:"histograms"`
}

type HistogramSetMetadata struct {
Expand All @@ -43,12 +43,12 @@ func NewHistogramSetMetadata() HistogramSetMetadata {
func NewHistogramSet() HistogramSet {
return HistogramSet{
Meta: NewHistogramSetMetadata(),
HistogramMap: map[string]Histogram{}}
HistogramMap: map[string]*Histogram{}}
}

func (hs *HistogramSet) Add(name, bin string, count int) {
if hs.HistogramMap == nil {
hs.HistogramMap = map[string]Histogram{}
hs.HistogramMap = map[string]*Histogram{}
}
if _, ok := hs.HistogramMap[name]; !ok {
hs.HistogramMap[name] = NewHistogram()
Expand Down
57 changes: 57 additions & 0 deletions data/histogram/read.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package histogram

import (
"fmt"
"strconv"
"strings"

"github.com/grokify/gocharts/data/table"
"github.com/grokify/simplego/type/stringsutil"
)

// ParseFileCSV reads a CSV using default settings of
// `,` separator, header row and BOM to be stripped. If you
// have other configurations, use `table.ReadFile()` directly
// and call `HistogramFromTable()`.
func ParseFileCSV(file string, binNameColIdx, binFrequencyColIdx uint) (*Histogram, error) {
tbl, err := table.ReadFile(file, ',', true, true)
if err != nil {
return nil, err
}
return ParseTable(tbl, binNameColIdx, binFrequencyColIdx)
}

// ParseTable parses a `table.Table` to a `Histogram` given a table,
// binName column index and binFrequency column index. Empty rows are
// skipped.
func ParseTable(tbl table.Table, binNameColIdx, binFrequencyColIdx uint) (*Histogram, error) {
hist := NewHistogram()
for _, rec := range tbl.Records {
if stringsutil.SliceIsEmpty(rec, true) {
continue
}
if int(binNameColIdx) >= len(rec) {
return hist, fmt.Errorf("error row length smaller than binNameColIdx: recordLen[%d] binNameColIdx [%d]",
len(rec), binNameColIdx)
} else if int(binFrequencyColIdx) >= len(rec) {
return hist, fmt.Errorf("error row length smaller than binFrequencyColIdx: recordLen[%d] binFrequencyColIdx [%d]",
len(rec), binFrequencyColIdx)
}
binName := strings.TrimSpace(rec[binNameColIdx])
binFreq := strings.TrimSpace(rec[binFrequencyColIdx])
if len(binName) == 0 && len(binFreq) == 0 {
continue
}
if len(binFreq) == 0 {
hist.Add(binName, 0)
} else {
binFreqInt, err := strconv.Atoi(binFreq)
if err != nil {
return hist, fmt.Errorf("error strconv frequency string[%s] err[%s]", binFreq, err.Error())
}
hist.Add(binName, binFreqInt)
}
}
hist.Inflate()
return hist, nil
}
36 changes: 36 additions & 0 deletions data/histogram/transform.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package histogram

import (
"strings"
)

// TransformBinNames modifies bin names and returns a new
// histogram.
func TransformBinNames(hist *Histogram, xfFunc func(input string) string) *Histogram {
if hist == nil {
return nil
}
newHist := NewHistogram()
for binName, binFreq := range hist.BinsFrequency {
newHist.Add(xfFunc(binName), binFreq)
}
return newHist
}

// TransformBinNamesByPrefix modifies bin names and returns a new
// histogram.
func TransformBinNamesByPrefix(hist *Histogram, xfMap map[string]string) *Histogram {
if hist == nil {
return nil
}
return TransformBinNames(hist,
func(oldName string) string {
for oldPrefix, newName := range xfMap {
if strings.Index(oldName, oldPrefix) == 0 {
return newName
}
}
return oldName
},
)
}
7 changes: 4 additions & 3 deletions data/table/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ import (
"fmt"
"io"
"io/ioutil"
"os"
"strings"

"github.com/grokify/simplego/encoding/csvutil"
"github.com/grokify/simplego/encoding/jsonutil"
"github.com/grokify/simplego/type/stringsutil"
"github.com/pkg/errors"
)
Expand Down Expand Up @@ -111,6 +109,7 @@ func ReadFile(path string, comma rune, hasHeader, stripBom bool) (Table, error)
return tbl, nil
}

/*
func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inStripBom bool, andFilter map[string]stringsutil.MatchInfo) (DocumentsSet, error) {
//data := JsonRecordsInfo{Records: []map[string]string{}}
data := NewDocumentsSet()
Expand Down Expand Up @@ -156,7 +155,8 @@ func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inS
data.Inflate()
return data, nil
}

*/
/*
func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, inStripBom bool, perm os.FileMode, andFilter map[string]stringsutil.MatchInfo) error {
data, err := ReadMergeFilterCSVFiles(inPaths, outPath, inComma, inStripBom, andFilter)
if err != nil {
Expand All @@ -168,6 +168,7 @@ func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, i
}
return ioutil.WriteFile(outPath, bytes, perm)
}
*/

func ReadCSVFilesSingleColumnValuesString(files []string, sep string, hasHeader, trimSpace bool, col uint, condenseUniqueSort bool) ([]string, error) {
values := []string{}
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ go 1.15

require (
github.com/360EntSecGroup-Skylar/excelize v1.4.1
github.com/blend/go-sdk v1.20210518.1 // indirect
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
github.com/grokify/elastirad-go v0.0.2
github.com/grokify/simplego v0.0.21
github.com/grokify/simplego v0.26.5
github.com/jessevdk/go-flags v1.5.0
github.com/olekukonko/tablewriter v0.0.5
github.com/pkg/errors v0.9.1
Expand Down
Loading

0 comments on commit 4c28118

Please sign in to comment.