Golang Colly Cheat Sheet

The file (test.html):

<!DOCTYPE html>
<html>
	<head>
		<title>Colly Test Page</title>
	</head>
	<body>
		<h1>Colly Test Page</h1>
		<div class="firsttext">This is the first text</div>
		<div class="secondtext">This is the second text</div>
		<pre class="firsttext">The first text again</pre>
		<div testattr="test">Test attribute</div>
		<div id="test_id">Test ID</div>
	</body>
</html>

The program:

func addCallback(c *colly.Collector, matcher string) {
	c.OnHTML(matcher, func(element *colly.HTMLElement) {
		fmt.Println("MATCHER: " + matcher)
		fmt.Println(element.Text)
	})
}

func main() {
	c := colly.NewCollector()
	addCallback(c, "h1")
	addCallback(c, "div")
	addCallback(c, "div.firsttext")
	addCallback(c, ".firsttext")
	addCallback(c, "div[testattr]")
	addCallback(c, "#test_id")
	c.WithTransport(http.NewFileTransport(http.Dir(".")))
	err := c.Visit("file://./test.html")
	if err != nil {
		println(err.Error())
	}
}

The output:

MATCHER: h1
Colly Test Page
MATCHER: div
This is the first text
MATCHER: div
This is the second text
MATCHER: div
Test attribute
MATCHER: div
Test ID
MATCHER: div.firsttext
This is the first text
MATCHER: .firsttext
This is the first text
MATCHER: .firsttext
The first text again
MATCHER: div[testattr]
Test attribute
MATCHER: #test_id
Test ID

Leave a Reply