Sol*_*ake 2 c# queue add task threadpool
我对这个穿线的东西还是新手.假设我有50000个URL,我想同时获取这些URL的内容,比如一起处理每10个URL.然后,一旦这些URL中的一个完成处理,程序就应该从队列列表中添加另一个,直到它完成处理列表中的所有URL.现在我怎么能用C#做到这一点..这是我到目前为止所做的代码..
class RequestState
{
public WebRequest Request;
// holds the request
public object Data;
// store any data in this
public string SiteUrl;
// holds the UrlString to match up results (Database lookup, etc).
public RequestState(WebRequest request, object data, string siteUrl)
{
this.Request = request;
this.Data = data;
this.SiteUrl = siteUrl;
}
}
private void PROCESS_URLS_Click(object sender, EventArgs e)
{
//run the process
process_URLs();
}
private int ThreadsCount = 0;
private void process_URLs()
{
//count threads number
ThreadsCount = URLS_LISTVIEW.SelectedItems.Count;
//loop through all URLs in listview
for (int i = 0; i < URLS_LISTVIEW.SelectedItems.Count; i++)
{
try
{
//get url string
string myURLs = URLS_LISTVIEW.SelectedItems[i].SubItems[0].Text.Trim();
// for each URL in the collection...
WebRequest request = HttpWebRequest.Create(myURLs);
request.Method = "GET";
object data = new object();
RequestState state = new RequestState(request, data, myURLs);
IAsyncResult result = request.BeginGetResponse(new AsyncCallback(UpdateItem), state);
ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle, new WaitOrTimerCallback(ScanTimeoutCallback), state, (30 * 1000), true);
}
catch (ThreadStateException es)
{
MessageBox.Show(es.Message);
}
}
}
private void UpdateItem(IAsyncResult result)
{
RequestState state = (RequestState)result.AsyncState;
WebRequest request = (WebRequest)state.Request;
try
{// grab the custom state object
// get the Response
HttpWebResponse response = (HttpWebResponse)request.EndGetResponse(result);
// process the response...
Stream s = (Stream)response.GetResponseStream();
StreamReader readStream = new StreamReader(s);
//data grabbed
string dataString = readStream.ReadToEnd();
response.Close();
s.Close();
readStream.Close();
//finished grabbing content for this thread.
ThreadsCount = ThreadsCount - 1;
//if all threads finished running then execute final code to tell the user the process finished
if (ThreadsCount < 1)
{
//show message
MessageBox.Show("finished");
}
// Thread.Sleep(400);
}
private static void ScanTimeoutCallback(object state, bool timedOut)
{
if (timedOut)
{
RequestState reqState = (RequestState)state;
if (reqState != null)
reqState.Request.Abort();
}
}
Run Code Online (Sandbox Code Playgroud)
任何想法,将不胜感激 :)
亲切的问候,
看一下TPL,可以选择指定最大并行度:
List<string> UriList = new List<string>();
...
Parallel.ForEach(UriList,
new ParallelOptions() {MaxDegreeOfParallelism=10},
(x) =>
{
ProcessUrl(x);
});
Run Code Online (Sandbox Code Playgroud)
这将最多并行处理10个Url,因为我们使用Parallel.Foreach()允许我们指定的重载MaxDegreeOfParallelism.
编辑:
这是一个简单的示例,可以将http://google.com中的Html 并行下载50次(但最多只能同时使用10个线程)并将结果存储在一个数组中:
List<string> UriList = new List<string>();
for(int i =0;i<50;i++)
UriList.Add("http://google.com");
string[] HtmlResults = new string[UriList.Count];
Parallel.ForEach(UriList,
new ParallelOptions() { MaxDegreeOfParallelism = 10 },
(url, i, j) =>
{
WebClient wc = new WebClient();
HtmlResults[j] = wc.DownloadString(url);
});
Run Code Online (Sandbox Code Playgroud)
不要造成更多的混淆,但在你的特定情况下,PLINQ也可以很好地工作,因为要处理的项目之间没有依赖关系,并且你有一个实际的结果,即URL被"转换"为:
var htmlResultList = UriList.AsParallel()
.WithDegreeOfParallelism(10)
.AsOrdered()
.Select(url => { WebClient wc = new WebClient(); return wc.DownloadString(url); })
.ToList();
Run Code Online (Sandbox Code Playgroud)