Files
learn-golang/go-crawler/crawler_basic.go
2025-12-26 17:56:02 +08:00

82 lines
1.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"fmt"
"io"
"net/http"
"regexp"
"sync"
)
func main() {
urls := []string{
"https://golang.org",
"https://go.dev",
"https://www.baidu.com",
"https://www.bing.com",
}
jobs := make(chan string)
results := make(chan string)
var wg sync.WaitGroup
workerCount := 3
//启动 worker
for i := 0; i < workerCount; i++ {
wg.Add(1)
go worker(i, jobs, results, &wg)
}
//发送任务
go func() {
for _, url := range urls {
jobs <- url
}
close(jobs) // 关闭 channel
}()
//单独goroutine 负责所有 worker结束后 关闭 results
go func() {
wg.Wait()
close(results)
}()
for res := range results {
fmt.Println(res)
}
}
// worker 从 jobs 读取 URL写结果到 results
func worker(id int, jobs <-chan string, results chan<- string, wg *sync.WaitGroup) {
defer wg.Done()
for url := range jobs {
title, err := fetchTitle(url)
if err != nil {
results <- fmt.Sprintf("[worker-%d] %s ERROR: %v", id, url, err)
continue
}
results <- fmt.Sprintf("[worker-%d] %s => %s", id, url, title)
}
}
func fetchTitle(url string) (string, error) {
resp, err := http.Get(url)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
re := regexp.MustCompile(`<title>(.+)</title>`)
matches := re.FindSubmatch(body)
if len(matches) >= 2 {
return string(matches[1]), nil
}
return "(no title)", nil
}