-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: data/histogram: add
ParseFileCSV()
, ParseTable(),
Transform…
…BinNames()`, `TransformBinNamesByPrefix()`
- Loading branch information
Showing
8 changed files
with
768 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package histogram | ||
|
||
import ( | ||
"io" | ||
"io/ioutil" | ||
"os" | ||
|
||
"github.com/grokify/simplego/encoding/csvutil" | ||
"github.com/grokify/simplego/encoding/jsonutil" | ||
"github.com/grokify/simplego/type/stringsutil" | ||
) | ||
|
||
func ReadMergeFilterCSVFiles(inPaths []string, outPath string, inComma rune, inStripBom bool, andFilter map[string]stringsutil.MatchInfo) (DocumentsSet, error) { | ||
//data := JsonRecordsInfo{Records: []map[string]string{}} | ||
data := NewDocumentsSet() | ||
|
||
for _, inPath := range inPaths { | ||
reader, inFile, err := csvutil.NewReader(inPath, inComma, inStripBom) | ||
if err != nil { | ||
return data, err | ||
} | ||
|
||
csvHeader := csvutil.CSVHeader{} | ||
j := -1 | ||
|
||
for { | ||
line, err := reader.Read() | ||
if err == io.EOF { | ||
break | ||
} else if err != nil { | ||
return data, err | ||
} | ||
j++ | ||
|
||
if j == 0 { | ||
csvHeader.Columns = line | ||
continue | ||
} | ||
match, err := csvHeader.RecordMatch(line, andFilter) | ||
if err != nil { | ||
return data, err | ||
} | ||
if !match { | ||
continue | ||
} | ||
|
||
mss := csvHeader.RecordToMSS(line) | ||
data.Documents = append(data.Documents, mss) | ||
} | ||
err = inFile.Close() | ||
if err != nil { | ||
return data, err | ||
} | ||
} | ||
data.Inflate() | ||
return data, nil | ||
} | ||
|
||
func MergeFilterCSVFilesToJSON(inPaths []string, outPath string, inComma rune, inStripBom bool, perm os.FileMode, andFilter map[string]stringsutil.MatchInfo) error { | ||
data, err := ReadMergeFilterCSVFiles(inPaths, outPath, inComma, inStripBom, andFilter) | ||
if err != nil { | ||
return err | ||
} | ||
bytes, err := jsonutil.MarshalSimple(data, "", " ") | ||
if err != nil { | ||
return err | ||
} | ||
return ioutil.WriteFile(outPath, bytes, perm) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package histogram | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/grokify/gocharts/data/table" | ||
"github.com/grokify/simplego/type/stringsutil" | ||
) | ||
|
||
// ParseFileCSV reads a CSV using default settings of | ||
// `,` separator, header row and BOM to be stripped. If you | ||
// have other configurations, use `table.ReadFile()` directly | ||
// and call `HistogramFromTable()`. | ||
func ParseFileCSV(file string, binNameColIdx, binFrequencyColIdx uint) (*Histogram, error) { | ||
tbl, err := table.ReadFile(file, ',', true, true) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return ParseTable(tbl, binNameColIdx, binFrequencyColIdx) | ||
} | ||
|
||
// ParseTable parses a `table.Table` to a `Histogram` given a table, | ||
// binName column index and binFrequency column index. Empty rows are | ||
// skipped. | ||
func ParseTable(tbl table.Table, binNameColIdx, binFrequencyColIdx uint) (*Histogram, error) { | ||
hist := NewHistogram() | ||
for _, rec := range tbl.Records { | ||
if stringsutil.SliceIsEmpty(rec, true) { | ||
continue | ||
} | ||
if int(binNameColIdx) >= len(rec) { | ||
return hist, fmt.Errorf("error row length smaller than binNameColIdx: recordLen[%d] binNameColIdx [%d]", | ||
len(rec), binNameColIdx) | ||
} else if int(binFrequencyColIdx) >= len(rec) { | ||
return hist, fmt.Errorf("error row length smaller than binFrequencyColIdx: recordLen[%d] binFrequencyColIdx [%d]", | ||
len(rec), binFrequencyColIdx) | ||
} | ||
binName := strings.TrimSpace(rec[binNameColIdx]) | ||
binFreq := strings.TrimSpace(rec[binFrequencyColIdx]) | ||
if len(binName) == 0 && len(binFreq) == 0 { | ||
continue | ||
} | ||
if len(binFreq) == 0 { | ||
hist.Add(binName, 0) | ||
} else { | ||
binFreqInt, err := strconv.Atoi(binFreq) | ||
if err != nil { | ||
return hist, fmt.Errorf("error strconv frequency string[%s] err[%s]", binFreq, err.Error()) | ||
} | ||
hist.Add(binName, binFreqInt) | ||
} | ||
} | ||
hist.Inflate() | ||
return hist, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package histogram | ||
|
||
import ( | ||
"strings" | ||
) | ||
|
||
// TransformBinNames modifies bin names and returns a new | ||
// histogram. | ||
func TransformBinNames(hist *Histogram, xfFunc func(input string) string) *Histogram { | ||
if hist == nil { | ||
return nil | ||
} | ||
newHist := NewHistogram() | ||
for binName, binFreq := range hist.BinsFrequency { | ||
newHist.Add(xfFunc(binName), binFreq) | ||
} | ||
return newHist | ||
} | ||
|
||
// TransformBinNamesByPrefix modifies bin names and returns a new | ||
// histogram. | ||
func TransformBinNamesByPrefix(hist *Histogram, xfMap map[string]string) *Histogram { | ||
if hist == nil { | ||
return nil | ||
} | ||
return TransformBinNames(hist, | ||
func(oldName string) string { | ||
for oldPrefix, newName := range xfMap { | ||
if strings.Index(oldName, oldPrefix) == 0 { | ||
return newName | ||
} | ||
} | ||
return oldName | ||
}, | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.