The file (test.html
):
<!DOCTYPE html>
<html>
<head>
<title>Colly Test Page</title>
</head>
<body>
<h1>Colly Test Page</h1>
<div class="firsttext">This is the first text</div>
<div class="secondtext">This is the second text</div>
<pre class="firsttext">The first text again</pre>
<div testattr="test">Test attribute</div>
<div id="test_id">Test ID</div>
</body>
</html>
The program:
func addCallback(c *colly.Collector, matcher string) {
c.OnHTML(matcher, func(element *colly.HTMLElement) {
fmt.Println("MATCHER: " + matcher)
fmt.Println(element.Text)
})
}
func main() {
c := colly.NewCollector()
addCallback(c, "h1")
addCallback(c, "div")
addCallback(c, "div.firsttext")
addCallback(c, ".firsttext")
addCallback(c, "div[testattr]")
addCallback(c, "#test_id")
c.WithTransport(http.NewFileTransport(http.Dir(".")))
err := c.Visit("file://./test.html")
if err != nil {
println(err.Error())
}
}
The output:
MATCHER: h1
Colly Test Page
MATCHER: div
This is the first text
MATCHER: div
This is the second text
MATCHER: div
Test attribute
MATCHER: div
Test ID
MATCHER: div.firsttext
This is the first text
MATCHER: .firsttext
This is the first text
MATCHER: .firsttext
The first text again
MATCHER: div[testattr]
Test attribute
MATCHER: #test_id
Test ID