深圳地标,58地标采集步骤1

采集第一步:获取远程地址 /// <summary>
/// 获取指定远程网页内容
/// </summary>
/// <param name="Url">所要查找的远程网页地址</param>
/// <param name="encoding">编码方式</param>
/// <returns></returns>
public string GetRemoteHtmlCode(string Url, System.Text.Encoding encoding)
{
if (Url.Equals("about:blank")) return null; ;
if (!Url.StartsWith("http://") && !Url.StartsWith("http://")) { Url = "http://" + Url; }
int dialCount = 0;
loop:
StreamReader sreader = null;
string result = string.Empty;
try
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(Url);
httpWebRequest.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";
httpWebRequest.Accept = "*/*";
httpWebRequest.KeepAlive = true;
httpWebRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
if (httpWebResponse.StatusCode == HttpStatusCode.OK)
{
sreader = new StreamReader(httpWebResponse.GetResponseStream(), encoding);
char[] cCont = new char[256];
int count = sreader.Read(cCont, 0, 256);
while (count > 0)
{
String str = new String(cCont, 0, count);
result += str;
count = sreader.Read(cCont, 0, 256);
}
}
if (null != httpWebResponse) { httpWebResponse.Close(); }
return result;
}
catch (WebException e)
{
if (e.Status == WebExceptionStatus.ConnectFailure) { dialCount++; }
if (dialCount < 5) { goto loop; }
return null;
}
finally
{
if (sreader != null)
{
sreader.Close();
}
}
}
第二步获取城市列表:
/// <summary>
/// 获取城市列表
/// </summary>
public string GetRegValue(string strIncImg)
{
Regex reg2 = new Regex("<dl[^>]*>(.*)<\\/dl>", RegexOptions.IgnoreCase);
string s = strIncImg.Replace("/r/n", "").Replace(System.Environment.NewLine, "");
return reg2.Match(s).ToString();
}
到此处能获取到城市列表,接下来接着分析了
Tags: 

延伸阅读

最新评论

发表评论