-
Notifications
You must be signed in to change notification settings - Fork 2
/
nekoReg.groovy
34 lines (27 loc) · 1.51 KB
/
nekoReg.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def nekoParser = new org.cyberneko.html.parsers.SAXParser()
nekoParser.setFeature('http://xml.org/sax/features/namespaces', false)
nekoParser.setFeature('http://cyberneko.org/html/features/scanner/ignore-specified-charset', true)
nekoParser.setProperty('http://cyberneko.org/html/properties/default-encoding',"TIS-620")
/*
def url="http://view.gprocurement.go.th/01_procure_egp/view_online_notice.php?id=387160&display_status=A"
def page = new XmlSlurper(nekoParser).parse(url)
def links = page.depthFirst().grep{ println it.name();it.name() == 'A' }
def span = page.depthFirst().grep{ it.name()=="SPAN" }
println links
println span
ofile=new File('./span.output')
span.eachWithIndex{ val , idx -> ofile.append( "${idx} ::-> ${val.DIV} \n" ,"UTF-8") }
*/
url="http://view.gprocurement.go.th/01_procure_egp/index.php?page=1"
page = new XmlSlurper(nekoParser).parse(url)
links = page.depthFirst().grep{ it.name() == 'A' && [email protected]().contains("id") }
/* ofile=new File('./span.output')
span.eachWithIndex{ val , idx -> ofile.append( "${idx} ::-> ${val.DIV} \n" ,"UTF-8") }*/
ofile=new File('./index')
links.each {
m= it.@href =~ /^(.)*("id")*(.)*(\d{6})(.)*/
ofile.append( " --> ${m[0].get(4)} -> ${it.toString().trim()} \n" ,"UTF-8")
}
/* def td= page.depthFirst().grep{ it.name() == 'TD' } */
/* println ofile.append( " ${td.toString()}\n" ,"UTF-8") */
/* td.eachWithIndex { i,j->ofile.append( "${i} ::: ${j.toString()}\n" ,"UTF-8") }*/