Go Concurrent Web Scraper Live Demo Code

Built from scratch this weekend using Colly + goroutines + channels.
Change the two const lines at the top and scrape anything.


package main

import (
        "fmt"
        "strings"
        "sync"

        "github.com/gocolly/colly"
)

// CHANGE THESE TWO LINES TO SCRAPE ANYTHING YOU WANT
const startURL = "https://go.dev/blog/"
const keyword  = "goroutine"

func main() {
        fmt.Printf("Starting Colly scraper → looking for \"%s\" on %s\n\n", keyword, startURL)

        // Channel to collect matching URLs
        hits := make(chan string, 50)

        // WaitGroup to know when all processing is done
        var wg sync.WaitGroup
        wg.Add(1)

        // Create a new Colly collector
        c := colly.NewCollector(
                colly.MaxDepth(2),                    // Follow links up to depth 2
                colly.Async(true),                    // Enable concurrent requests
                colly.AllowedDomains("go.dev", "golang.org"),
        )

        // When we find the keyword, send the URL to hits channel
        c.OnHTML("body", func(e *colly.HTMLElement) {
                if strings.Contains(strings.ToLower(e.Text), keyword) {
                        hits <- e.Request.URL.String()
                }
        })

        // Follow links to Go blog posts
        c.OnHTML("a[href]", func(e *colly.HTMLElement) {
                link := e.Attr("href")
                if strings.HasPrefix(link, "/blog/") || strings.Contains(link, "go.dev/blog/") {
                        e.Request.Visit(link)
                }
        })

        // Error handling
        c.OnError(func(r *colly.Response, err error) {
                fmt.Printf("[ERROR] %s → %v\n", r.Request.URL, err)
        })

        // Start scraping
        go func() {
                defer wg.Done()
                c.Visit(startURL)
                c.Wait() // Wait for all requests to finish
                close(hits)
        }()

        // Print results as they come in
        count := 0
        for url := range hits {
                fmt.Printf("[HIT] %s\n", url)
                count++
        }

        fmt.Printf("\nDone — found %d pages containing \"%s\"\n", count, keyword)
        fmt.Println("Clean exit — showcase ready!")
}