Built from scratch this weekend using Colly + goroutines + channels.
Change the two const lines at the top and scrape anything.
package main
import (
"fmt"
"strings"
"sync"
"github.com/gocolly/colly"
)
// CHANGE THESE TWO LINES TO SCRAPE ANYTHING YOU WANT
const startURL = "https://go.dev/blog/"
const keyword = "goroutine"
func main() {
fmt.Printf("Starting Colly scraper → looking for \"%s\" on %s\n\n", keyword, startURL)
// Channel to collect matching URLs
hits := make(chan string, 50)
// WaitGroup to know when all processing is done
var wg sync.WaitGroup
wg.Add(1)
// Create a new Colly collector
c := colly.NewCollector(
colly.MaxDepth(2), // Follow links up to depth 2
colly.Async(true), // Enable concurrent requests
colly.AllowedDomains("go.dev", "golang.org"),
)
// When we find the keyword, send the URL to hits channel
c.OnHTML("body", func(e *colly.HTMLElement) {
if strings.Contains(strings.ToLower(e.Text), keyword) {
hits <- e.Request.URL.String()
}
})
// Follow links to Go blog posts
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Attr("href")
if strings.HasPrefix(link, "/blog/") || strings.Contains(link, "go.dev/blog/") {
e.Request.Visit(link)
}
})
// Error handling
c.OnError(func(r *colly.Response, err error) {
fmt.Printf("[ERROR] %s → %v\n", r.Request.URL, err)
})
// Start scraping
go func() {
defer wg.Done()
c.Visit(startURL)
c.Wait() // Wait for all requests to finish
close(hits)
}()
// Print results as they come in
count := 0
for url := range hits {
fmt.Printf("[HIT] %s\n", url)
count++
}
fmt.Printf("\nDone — found %d pages containing \"%s\"\n", count, keyword)
fmt.Println("Clean exit — showcase ready!")
}