diff --git a/README.md b/README.md index 018b4d1..7196d7a 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,8 @@ There are all supported funs: | Name | Parameters | Description | | --------- | -------------------------------- | ---------------------------------------- | -| $ | (selector: string) | CSS selector | +| $ | (selector: string) | Relative CSS selector (select from parent node)| +| $root | (selector: string) | Absolute CSS selector (select from body)| | html | | inner HTML | | text | | inner text | | outerHTML | | outer HTML | diff --git a/eh.crs b/eh.crs index 8fa6772..8a16256 100644 --- a/eh.crs +++ b/eh.crs @@ -7,6 +7,7 @@ gallery(@page=0,code)="http://g.e-hentai.org/g/{code}/?p={@page}&hc=1" # Crawler Struct gallery[]: index -> $("table.itg tr.gtr0,tr.gtr1") + @next: $root("table.ptt td:last-child a").href title: $("td.itd div div.it5 a").text cover: $("td.itd div div.it2") .html diff --git a/format.go b/format.go index 9d33b49..105f66d 100644 --- a/format.go +++ b/format.go @@ -14,7 +14,7 @@ type Formatted struct { var ( rx_isTown = regexp.MustCompile(`^\s*[a-zA-Z][a-zA-Z_-]{0,31}\s*(\(|=)`) - rx_isNode = regexp.MustCompile(`^\s*@?[a-zA-Z_-]{1,32}(\[]|\*)?:`) + rx_isNode = regexp.MustCompile(`^\s*@?[a-zA-Z_-]+(\[]|\*)?:`) ) func Formatting(s string) *Formatted { diff --git a/fun.go b/fun.go index 1d7f384..1ff08ee 100644 --- a/fun.go +++ b/fun.go @@ -23,6 +23,7 @@ type Fun struct { Document *goquery.Document Selection *goquery.Selection Result string + TempStop bool PrevFun *Fun NextFun *Fun @@ -67,7 +68,6 @@ func (f *Fun) PageBody() (*goquery.Document, error) { func (f *Fun) InitSelector(root bool) error { var baseSel *goquery.Selection - if f.Node.Page != nil { doc, err := f.PageBody() if err != nil { @@ -77,16 +77,29 @@ func (f *Fun) InitSelector(root bool) error { baseSel = f.Document.Selection } else { f.Node.ParentNode.Fun.Invoke() - baseSel = f.Node.ParentNode.Fun.Selection + if root { + baseSel = f.Node.ParentNode.Fun.Document.Selection + } else { + baseSel = f.Node.ParentNode.Fun.Selection + } } if f.Node.IsArray { bundle := PowerfulFind(baseSel, f.Params[0]) - if len(bundle.Nodes) > f.Node.Index { + if len(bundle.Nodes) > f.Node.Index || f.TempStop { f.Selection = PowerfulFind(baseSel, f.Params[0]).Eq(f.Node.Index) + f.TempStop = false } else { // overflow current page - f.Node.Page.Inc() + if f.Node.NextDirectorNode() != nil { + f.TempStop = true + np, err := f.Node.NextDirectorNode().Value() + if err != nil { return err } + f.Node.Page.NextMode = true + f.Node.Page.NextUrl = np + } else { + f.Node.Page.Inc() + } f.Node.Reset() f.InitSelector(root) } @@ -106,6 +119,8 @@ func (f *Fun) Invoke() (string, error) { switch f.Name { case "$": err = f.InitSelector(false) + case "$root": + err = f.InitSelector(true) case "attr": f.Result, _ = f.PrevFun.Selection.Attr(f.Params[0]) case "text": diff --git a/page.go b/page.go index f2544c5..a063687 100644 --- a/page.go +++ b/page.go @@ -10,8 +10,11 @@ type Page struct { Raw string Node *Node - Town *Town - Ref string + Town *Town + Ref string + + NextUrl string + NextMode bool Index int } @@ -21,6 +24,9 @@ func (p *Page) Inc() { } func (p *Page) Url() (string, error) { + if p.NextMode { + return p.NextUrl, nil + } if p.Town != nil { p.Town.Attach() if p.Index > -1 { diff --git a/util.go b/util.go index 1a2b5db..e38ecef 100644 --- a/util.go +++ b/util.go @@ -35,3 +35,24 @@ func MD5(s string) string { m.Write([]byte(s)) return hex.EncodeToString(m.Sum(nil)) } + +type MonoStack struct { + value string + has bool +} + +func (o *MonoStack) Set(s string) { + o.value = s + o.has = true +} + +func (o *MonoStack) Has() bool { + return o.has +} + +func (o *MonoStack) Value() string { + o.has = false + s := o.value + o.value = "" + return s +} \ No newline at end of file