跪求抓取BT发布页的代码 (******十万火急******)

qiezic 2006-08-31 05:20:48
我使用以下代码抓取,但抓到的却是乱码,高人指点啊…………

string url="http://bt3.btchina.net/?s=&pagenumber=12";
CookieContainer cook = new CookieContainer();
HttpWebRequest wrq = null;
WebResponse wrs = null;
wrq = (HttpWebRequest)WebRequest.Create(url);
wrq.Timeout = 115000;
wrq.Credentials = CredentialCache.DefaultCredentials;
wrq.UserAgent = "MSIE6.0";
wrq.Method = "OPTIONS";
wrq.AllowAutoRedirect = true;
wrq.ContentType = "application/x-www-form-urlencoded";
System.Net.ServicePointManager.DefaultConnectionLimit = 10000;
//System.Threading.Thread.Sleep(10000);
wrq.CookieContainer = cook;
wrs = wrq.GetResponse();
StreamReader strm = new StreamReader(wrs.GetResponseStream(), System.Text.Encoding.Default);
MessageBox.Show(strm.ReadToEnd());


先跪谢啦
...全文
343 11 打赏 收藏 转发到动态 举报
写回复
用AI写文章
11 条回复
切换为时间正序
请发表友善的回复…
发表回复
qiezic 2006-09-01
  • 打赏
  • 举报
回复
哈哈,谢谢楼上
能抓防盗链的吗,象BT这样的?
ksoft_luo 2006-09-01
  • 打赏
  • 举报
回复
以下是我采用socks抓取网页的代码。由于使用了代理。你如果使用的话需要改变部分代码
=================================
namespace Spider.Common
{
public class GetUrlHtml
{
public string GetHtmlByUrl(string Url)
{
//Uri contentUrl = new Uri(Url);
Uri contentUrl = new Uri("http://www.163.com");
string strRet = "";
try
{
WebRequest req = WebRequest.Create(contentUrl);
WebProxy myWebProxy = new WebProxy("192.168.0.1", 1080);

//设定为默认的认证方法
/*IWebProxy myProxy = GlobalProxySelection.GetEmptyWebProxy();
GlobalProxySelection.Select = myProxy;
req.Proxy = myProxy;*/
req.Credentials = CredentialCache.DefaultCredentials;
WebResponse resp = req.GetResponse();
Stream stream = resp.GetResponseStream();
StreamReader sr = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));
strRet = sr.ReadToEnd();
resp.Close();
stream.Close();
sr.Close();
}
catch (Exception error)
{
strRet = error.Message;
}
return strRet;
}


/// <summary>
///
/// </summary>
/// <param name="Url"></param>
/// <returns></returns>
public string GetHtmlBySocket(string Url, bool UseSocks5Proxy, string ProxyUrl, int ProxyPort)
{
string strHtml = "";
string strDefaultEncodingName = "gb2312";
MemoryStream ms = ResponseUrlBySocket(Url, UseSocks5Proxy, ProxyUrl, ProxyPort);
strHtml = Encoding.GetEncoding(strDefaultEncodingName).GetString(ms.GetBuffer());
if (strHtml.ToLower().Replace(" ","").IndexOf("charset=utf-8") > 0)
{
strHtml = Encoding.GetEncoding("utf-8").GetString(ms.GetBuffer());
}
return strHtml;
}

public string GetHtmlBySocket(string Url)
{
if (true)
{
return GetHtmlBySocket(Url, true, "192.168.0.1", 1080);
}
}


public MemoryStream ResponseUrlBySocket(string Url, bool UseSocks5Proxy, string ProxyUrl, int ProxyPort)
{
string strUrl = Url.ToLower().Replace("http://", "");
string strHost = "";
int port = 80;

string[] aryTemp = strUrl.Split('/');
if (aryTemp[0].IndexOf(':') > 0)
{
string[] aryTempPort = aryTemp[0].Split(':');
strHost = aryTempPort[0];
port = Convert.ToInt32(aryTempPort[1]);
}
else
{
strHost = aryTemp[0];
}
//得到主机信息
IPHostEntry ipInfo = Dns.GetHostEntry(strHost);
//取得IPAddress[]
IPAddress[] ipAddr = ipInfo.AddressList;
//得到ip
IPAddress ip = ipAddr[0];
//组合出远程终结点
IPEndPoint hostEP = new IPEndPoint(ip, port);
//创建Socket 实例
Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
try
{
//尝试连接
//
if (UseSocks5Proxy)
{
socket.Connect(ProxyUrl, ProxyPort);
ConnectProxyServer(strHost, port, socket);
}
else { socket.Connect(strHost, port); }
}
catch { }
//发送给远程主机的请求内容串
//strUrl = "newsread.asp?id=485";
string sendStr = "";
sendStr += "GET " + strUrl.Replace("http://", "").Replace(strHost, "") + " HTTP/1.1\r\n";
sendStr += "Host: " + strHost + "\r\n";
sendStr += "Connection: Close\r\n\r\n";
//创建bytes字节数组以转换发送串
byte[] bytesSendStr = new byte[1024];
//将发送内容字符串转换成字节byte数组
bytesSendStr = Encoding.ASCII.GetBytes(sendStr);
try
{
//向主机发送请求
socket.Send(bytesSendStr, bytesSendStr.Length, 0);
}
catch { }
//声明字节数组,一次接收数据的长度为1024字节
byte[] recvBytes = new byte[1024];
//返回实际接收内容的字节数
int bytes = 0;
//循环读取,直到接收完所有数据
MemoryStream ms = new MemoryStream();
while (true)
{
bytes = socket.Receive(recvBytes, recvBytes.Length, 0);
//读取完成后退出循环
if (bytes <= 0) { break; }
ms.Write(recvBytes, 0, bytes);
}
//禁用Socket
socket.Shutdown(SocketShutdown.Both);
//关闭Socket
socket.Close();
return ms;
}


#region 连接匿名socks5服务器代码
private bool ConnectProxyServer(string strRemoteHost, int iRemotePort, Socket sProxyServer)
{
//构造Socks5代理服务器第一连接头(无用户名密码)
byte[] bySock5Send = new Byte[10];
bySock5Send[0] = 5;
bySock5Send[1] = 1;
bySock5Send[2] = 0;

//发送Socks5代理第一次连接信息
sProxyServer.Send(bySock5Send, 3, SocketFlags.None);

byte[] bySock5Receive = new byte[10];
int iRecCount = sProxyServer.Receive(bySock5Receive, bySock5Receive.Length, SocketFlags.None);

if (iRecCount < 2)
{
sProxyServer.Close();
throw new Exception("不能获得代理服务器正确响应。");
}

if (bySock5Receive[0] != 5 || (bySock5Receive[1] != 0 && bySock5Receive[1] != 2))
{
sProxyServer.Close();
throw new Exception("代理服务其返回的响应错误。");
}

if (bySock5Receive[1] == 0)
{
bySock5Send[0] = 5;
bySock5Send[1] = 1;
bySock5Send[2] = 0;
bySock5Send[3] = 1;

IPAddress ipAdd = Dns.GetHostEntry(strRemoteHost).AddressList[0];
string strIp = ipAdd.ToString();
string[] strAryTemp = strIp.Split(new char[] { '.' });
bySock5Send[4] = Convert.ToByte(strAryTemp[0]);
bySock5Send[5] = Convert.ToByte(strAryTemp[1]);
bySock5Send[6] = Convert.ToByte(strAryTemp[2]);
bySock5Send[7] = Convert.ToByte(strAryTemp[3]);

bySock5Send[8] = (byte)(iRemotePort / 256);
bySock5Send[9] = (byte)(iRemotePort % 256);

sProxyServer.Send(bySock5Send, bySock5Send.Length, SocketFlags.None);
iRecCount = sProxyServer.Receive(bySock5Receive, bySock5Receive.Length, SocketFlags.None);

if (bySock5Receive[0] != 5 || bySock5Receive[1] != 0)
{
sProxyServer.Close();
throw new Exception("第二次连接Socks5代理返回数据出错。");
}
return true;
}
else
{
if (bySock5Receive[1] == 2)
throw new Exception("代理服务器需要进行身份确认。");
else return false;
}
}
#endregion

}
}



更多资料:http://www.34v.com


qiezic 2006-09-01
  • 打赏
  • 举报
回复
谁有能抓取BT网站页面跟种子的代码啊
http://bt3.btchina.net/
srz007 2006-09-01
  • 打赏
  • 举报
回复
如果抓到的是乱码的话应该是你的编码格式的问题吧
qiezic 2006-09-01
  • 打赏
  • 举报
回复
是啊,没用

楼上知道的话还望赐教啊
555555555555555555555555555555555555555555555555
ruan_hg 2006-09-01
  • 打赏
  • 举报
回复
你程序中的cook好像是画蛇添足
ilove8 2006-09-01
  • 打赏
  • 举报
回复
mark
qiezic 2006-09-01
  • 打赏
  • 举报
回复
试了下,上面的代码没饶过防盗链

大家继续帮忙,谢谢
qiezic 2006-08-31
  • 打赏
  • 举报
回复
望各位奔走相告,帮我解决问题,谢谢、谢谢、谢谢
qiezic 2006-08-31
  • 打赏
  • 举报
回复
楼上代码抓不到代码,下一位继续……
Avoid 2006-08-31
  • 打赏
  • 举报
回复
private string GetHtmlString(string sUrl)
{
string sHtml = "";
HttpWebRequest request;
HttpWebResponse response=null;
Stream stream;

while (true)
{
try
{
request =(HttpWebRequest)WebRequest.Create(sUrl);
response=(HttpWebResponse)request.GetResponse();
stream=response.GetResponseStream();
sHtml=new StreamReader(stream,System.Text.Encoding.Default).ReadToEnd();
break;
}
catch (Exception e)
{
if (response!=null) response.Close();
DialogResult result=
MessageBox.Show(null,"Http request time out. Do you want to retry?","Time Out Error",MessageBoxButtons.YesNo);
if (result==DialogResult.No)
throw e;

}
}
stream.Close();
response.Close();
return sHtml;
}

110,579

社区成员

发帖
与我相关
我的任务
社区描述
.NET技术 C#
社区管理员
  • C#
  • Web++
  • by_封爱
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告

让您成为最强悍的C#开发者

试试用AI创作助手写篇文章吧