Ant*_*sta 1 go web-scraping go-colly
我正在尝试抓取一个网站,但似乎我的产品部分是空的。
scraper.go:
package scraper
import (
"fmt"
"strings"
"github.com/gocolly/colly"
"github.com/gocolly/colly/extensions"
)
type Product struct {
name string
fullPrice string
url string
}
func Scraper(site string) []Product {
products := []Product{}
c := colly.NewCollector()
replacer := strings.NewReplacer("R$", "", ",", ".")
c.OnHTML("div#column-main-content", func(e *colly.HTMLElement) {
fullPrice := e.ChildText("span.m7nrfa-0.eJCbzj.sc-ifAKCX.ANnoQ")
product := Product{
name: e.ChildText("h2"),
fullPrice: replacer.Replace(fullPrice),
url: e.ChildAttr("a.sc-1fcmfeb-2.iezWpY", "href"),
}
fmt.Println(product)
products = append(products, product)
})
fmt.Println(products)
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r.Request, "\nError:", err)
})
// Uses a random User-Agent in each request
extensions.RandomUserAgent(c)
c.Visit(site)
return products
}
Run Code Online (Sandbox Code Playgroud)
main.go:
package main
import "github.com/Antonio-Costa00/Go-Price-Monitor/scraper"
func main() {
scraper.Scraper("https://sp.olx.com.br/?q=iphone%27")
}
Run Code Online (Sandbox Code Playgroud)
产品变量有输出,但切片为空。
切片输出:
[]
我不知道将结果附加到产品切片时是否做错了什么。
有人可以帮我检查一下我返回空切片是否做错了什么吗?
Colly库异步进行抓取,因此当您打印它时它是products空的,但它会被另一个goroutine. 通过使用OnScraped处理程序并在那里打印,products您应该看到它已被填充。
package scraper
import (
"fmt"
"strings"
"github.com/gocolly/colly"
"github.com/gocolly/colly/extensions"
)
type Product struct {
name string
fullPrice string
url string
}
func Scraper(site string) []Product {
products := []Product{}
c := colly.NewCollector()
replacer := strings.NewReplacer("R$", "", ",", ".")
c.OnHTML("div#column-main-content", func(e *colly.HTMLElement) {
fullPrice := e.ChildText("span.m7nrfa-0.eJCbzj.sc-ifAKCX.ANnoQ")
product := Product{
name: e.ChildText("h2"),
fullPrice: replacer.Replace(fullPrice),
url: e.ChildAttr("a.sc-1fcmfeb-2.iezWpY", "href"),
}
fmt.Println(product)
products = append(products, product)
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r.Request, "\nError:", err)
})
c.OnScraped(func(r *colly.Response) {
fmt.Println(products)
})
// Uses a random User-Agent in each request
extensions.RandomUserAgent(c)
c.Visit(site)
return products
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
382 次 |
| 最近记录: |