MrW*_*d54 0 google-app-engine go
我陷入了自己的等待循环,不太清楚为什么。该函数采用输入和输出通道,然后采用通道中的每个项目,对内容执行 http.GET 并从 html 中提取标签。
\n\nGET 和抓取的过程位于 go 例程内,并且我设置了一个等待组 (innerWait) 以确保在关闭输出通道之前我已处理完所有内容。
\n\n func (fp FeedProducer) getTitles(in <-chan feeds.Item,\n out chan<- feeds.Item,\n wg *sync.WaitGroup) {\n\n defer wg.Done()\n\n var innerWait sync.WaitGroup\n\n for item := range in {\n log.Infof(fp.c, "Incrementing inner WaitGroup.")\n innerWait.Add(1)\n go func(item feeds.Item) {\n defer innerWait.Done()\n defer log.Infof(fp.c, "Decriment inner wait group by defer.")\n client := urlfetch.Client(fp.c)\n resp, err := client.Get(item.Link.Href)\n log.Infof(fp.c, "Getting title for: %v", item.Link.Href)\n if err != nil {\n log.Errorf(fp.c, "Error retriving page. %v", err.Error())\n return\n }\n if strings.ToLower(resp.Header.Get("Content-Type")) == "text/html; charset=utf-8" {\n title := fp.scrapeTitle(resp)\n item.Title = title\n } else {\n log.Errorf(fp.c, "Wrong content type. Received: %v from %v", resp.Header.Get("Content-Type"), item.Link.Href)\n }\n out <- item\n }(item)\n }\n log.Infof(fp.c, "Waiting for title pull wait group.")\n innerWait.Wait()\n log.Infof(fp.c, "Done waiting for title pull.")\n close(out)\n}\n\nfunc (fp FeedProducer) scrapeTitle(request *http.Response) string {\n defer request.Body.Close()\n tokenizer := html.NewTokenizer(request.Body)\n var titleIsNext bool\n for {\n token := tokenizer.Next()\n switch {\n case token == html.ErrorToken:\n log.Infof(fp.c, "Hit the end of the doc without finding title.")\n return ""\n case token == html.StartTagToken:\n tag := tokenizer.Token()\n isTitle := tag.Data == "title"\n\n if isTitle {\n titleIsNext = true\n }\n case titleIsNext && token == html.TextToken:\n title := tokenizer.Token().Data\n log.Infof(fp.c, "Pulled title: %v", title)\n return title\n }\n }\n}\nRun Code Online (Sandbox Code Playgroud)\n\n日志内容如下所示:
\n\n2015/08/09 22:02:10 INFO: Revived query parameter: golang\n2015/08/09 22:02:10 INFO: Getting active tweets from the last 7 days.\n2015/08/09 22:02:10 INFO: Incrementing inner WaitGroup.\n2015/08/09 22:02:10 INFO: Incrementing inner WaitGroup.\n2015/08/09 22:02:10 INFO: Incrementing inner WaitGroup.\n2015/08/09 22:02:10 INFO: Incrementing inner WaitGroup.\n2015/08/09 22:02:10 INFO: Incrementing inner WaitGroup.\n2015/08/09 22:02:10 INFO: Incrementing inner WaitGroup.\n2015/08/09 22:02:10 INFO: Waiting for title pull wait group.\n2015/08/09 22:02:10 INFO: Getting title for: http://devsisters.github.io/goquic/\n2015/08/09 22:02:10 INFO: Pulled title: GoQuic by devsisters\n2015/08/09 22:02:10 INFO: Getting title for: http://whizdumb.me/2015/03/03/matching-a-string-and-extracting-values-using-regex/\n2015/08/09 22:02:10 INFO: Pulled title: Matching a string and extracting values using regex | Whizdumb\'s blog\n2015/08/09 22:02:10 INFO: Getting title for: https://www.reddit.com/r/golang/comments/3g7tyv/dropboxs_infrastructure_is_go_at_a_huge_scale/\n2015/08/09 22:02:10 INFO: Pulled title: Dropbox\'s infrastructure is Go at a huge scale : golang\n2015/08/09 22:02:10 INFO: Getting title for: http://dave.cheney.net/2015/08/08/performance-without-the-event-loop\n2015/08/09 22:02:10 INFO: Pulled title: Performance without the event loop | Dave Cheney\n2015/08/09 22:02:11 INFO: Getting title for: https://github.com/ccirello/sublime-gosnippets\n2015/08/09 22:02:11 INFO: Pulled title: ccirello/sublime-gosnippets \xc2\xb7 GitHub\n2015/08/09 22:02:11 INFO: Getting title for: https://medium.com/iron-io-blog/an-easier-way-to-create-tiny-golang-docker-images-7ba2893b160?mkt_tok=3RkMMJWWfF9wsRonuqTMZKXonjHpfsX57ewoWaexlMI/0ER3fOvrPUfGjI4ATsNrI%2BSLDwEYGJlv6SgFQ7LMMaZq1rgMXBk%3D&utm_content=buffer45a1c&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer\n2015/08/09 22:02:11 INFO: Pulled title: An Easier Way to Create Tiny Golang Docker Images \xe2\x80\x94 Iron.io Blog \xe2\x80\x94 Medium\nRun Code Online (Sandbox Code Playgroud)\n\n我可以看到,我正在根据日志执行 insideWait.Wait() 命令,这也告诉我入站通道已在管道的另一侧关闭。
\n\n看起来匿名函数中的 defer 语句没有被调用,因为我在任何地方都看不到打印的延迟日志语句。但我一生都无法说出原因,因为该块中的所有代码似乎都在执行。
\n\n感谢帮助。
\ngoroutine 被困out在发送到这一行:
out <- item
Run Code Online (Sandbox Code Playgroud)
修复方法是启动一个 goroutine 来接收out.
调试此类问题的一个好方法是通过向进程发送 SIGQUIT 来转储 Goroutine 堆栈。
| 归档时间: |
|
| 查看次数: |
2318 次 |
| 最近记录: |