62,041
社区成员
发帖
与我相关
我的任务
分享
/// <summary>
/// 返回超连接的数组
/// </summary>
/// <param name="userInput"></param>
/// <param name="WebText"></param>
/// <returns></returns>
public string[] Get_url_Array(string userInput, string WebText)
{
MatchCollection mc = Regex.Matches(userInput, WebText);
ArrayList Url_List = new ArrayList();
foreach (Match m in mc)
{
Url_List.Add(m.Value.ToString().Replace(" ", " "));
}
return (string[])Url_List.ToArray(typeof(string));
}
/// <summary>
/// 获取网页源代码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public string GetHtmlEx(string url)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 60000;
request.UserAgent = userAgent;
request.ContentType = contentType;
request.CookieContainer = cookie;
request.Accept = accept;
request.Method = "get";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.ContentType.Length == 9)
{
_encode = "gbk";
}
else
{
_encode = getEncoding(response);
}
Stream responseStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responseStream, Encoding.GetEncoding(_encode));
String html = reader.ReadToEnd();
response.Close();
return html;
}
string Htmlstring = GetHtmlEx(url);
Htmlstring = Regex.Replace(Htmlstring, "\"", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, "'", "", RegexOptions.IgnoreCase);
string[] arr0 = Get_url_Array(Htmlstring, rulesObj.RRegex_0);//标题