64,656
社区成员
发帖
与我相关
我的任务
分享
#include <WinSock2.h>
#include <cstring>
#include <vector>
#include <fstream>
#include <map>
#include <iostream>
#include <boost/regex.hpp>
using namespace std;
map<char *,vector<char *>> web;
char recvBuf[1024*200+1] = {0}; //接收缓冲区
char sendBuff[200] = "GET "; //发送缓冲区
char *str = "http://(\w+\.\w+\.\w+)(/\w+)*(/\w+\.\w){1}";
boost::basic_regex<char> regStr(str);//查找http的正则表达式
boost::cmatch httpSet; //保存匹配的http
bool connectServ(const char *);//连接服务器,并下载网页
void getPage(char *server,
char * http,
SOCKET &sockClient,
map<char *,vector<char *>> &web); // 下载网页
int main(int argc, char **argv)
{
ofstream outFile;
char * const index="www.hnu.cn";
web[index].push_back("http://www.hnu.cn/");
map<char *,vector<char *>>::iterator map_It = web.begin();
for (;map_It != web.end();map_It++)
{
//从map对象中读取服务器和网址进行访问
connectServ(map_It->first);
}
outFile.open("http.txt",ios::out|ios::app);
for (;map_It != web.end();map_It++)
{
outFile<<"Server: "<<map_It->first<<endl;
vector<char *>::iterator beg_it = map_It->second.begin();
vector<char *>::iterator end_it = map_It->second.end();
for (;beg_it != end_it; beg_it++)
{
//打印服务器下所有网址
outFile<<*beg_it<<endl;
}
}
return 0;
}
bool connectServ(char * const &server)
{
WORD wVersionRequested;
WSADATA wsaData;
int err;
wVersionRequested = MAKEWORD( 1, 1 );
err = WSAStartup( wVersionRequested, &wsaData );
if ( err != 0 )
{
return false;
}
if ( LOBYTE( wsaData.wVersion ) != 1 ||
HIBYTE( wsaData.wVersion ) != 1 )
{
WSACleanup( );
return false;
}
SOCKET sockClient=socket(AF_INET,SOCK_STREAM,0);
SOCKADDR_IN addrSrv;
hostent* remoteHost;
char * const host_name= server;
unsigned int addr;
if (isalpha(host_name[0]))
{ /* host address is a name */
if (host_name[strlen(host_name)-1] == '\n')
host_name[strlen(host_name)-1] = '\0';
remoteHost = gethostbyname(host_name);
}
else
{
addr = inet_addr(host_name);
remoteHost = gethostbyaddr((char *)&addr, 4, AF_INET);
}
memcpy(&addrSrv.sin_addr.S_un.S_addr,remoteHost->h_addr,remoteHost->h_length);
if (WSAGetLastError() != 0)
{
if (WSAGetLastError() == 11001)
printf("Host not found...\nExiting.\n");
}
else
printf("error#:%ld\n", WSAGetLastError());
addrSrv.sin_family=AF_INET;
addrSrv.sin_port=htons(80);
connect(sockClient,(SOCKADDR*)&addrSrv,sizeof(SOCKADDR));
vector<char *>::iterator iter = web[server].begin();
for (;iter != web[server].end();iter++)
{
//下载一个服务器上的网页
getPage(server, *iter,sockClient,web);
}
closesocket(sockClient);
WSACleanup();
cout<<"getPage success!"<<endl;
}
void getPage(char *server,char * http,SOCKET &sockClient,map<char *,vector<char *>> &web)
{
//http消息格式
strcpy(sendBuff,http);
strcpy(sendBuff," HTTP/1.0\r\nHost:");
strcpy(sendBuff,server);
strcpy(sendBuff,"\r\n\r\n");
send(sockClient,sendBuff,strlen(sendBuff),0);
int cc;
cc=recv(sockClient,recvBuf,1024*200,0);
char * tempBuf = recvBuf;
while(cc!=SOCKET_ERROR&&cc>0)
{
//循环下载网页数据并保存到map
while (boost::regex_search(tempBuf,httpSet,regStr))
{
string temp(httpSet[0].first,httpSet[0].second);
web[server].push_back(const_cast<char *>(temp.c_str()));
tempBuf = const_cast<char *>(httpSet[0].second);
}
cc=recv(sockClient,recvBuf,1024*200,0);
}
}
#include <WinSock2.h>
#include <cstring>
#include <vector>
#include <fstream>
#include <map>
#include <iostream>
#include <boost/regex.hpp>
using namespace std;
#pragma comment( lib , "ws2_32.lib")
map<char *,vector<char *> > web;
char recvBuf[1024*200+1] = {0}; //接收缓冲区
char sendBuff[200] = "GET "; //发送缓冲区
char *str = "http://(\\w+\\.\\w+\\.\\w+)(/\\w+)*(/\\w+\\.\\w){1}";
boost::basic_regex<char> regStr(str);//查找http的正则表达式
boost::cmatch httpSet; //保存匹配的http
void connectServ(char * const &server);//连接服务器,并下载网页
void getPage(char *const &server,
char * http,
SOCKET &sockClient,
map<char *,vector<char *> > &web); // 下载网页
ofstream outFile;
int main(int argc, char **argv)
{
outFile.open("http.txt",ios::out|ios::app);
char * const index="www.hnu.cn";
web[index].push_back("http://www.hnu.cn/");
map<char *,vector<char *> >::iterator map_It = web.begin();
for (;map_It != web.end();map_It++)
{
//从map对象中读取服务器和网址进行访问
connectServ(map_It->first);
}
for (;map_It != web.end();map_It++)
{
outFile<<"Server: "<<map_It->first<<endl;
vector<char *>::iterator beg_it = map_It->second.begin();
vector<char *>::iterator end_it = map_It->second.end();
for (;beg_it != end_it; beg_it++)
{
//打印服务器下所有网址
outFile<<*beg_it<<endl;
}
}
return 0;
}
void connectServ(char * const &server)
{
WORD wVersionRequested;
WSADATA wsaData;
int err;
wVersionRequested = MAKEWORD( 1, 1 );
err = WSAStartup( wVersionRequested, &wsaData );
if ( err != 0 )
{
return;
}
if ( LOBYTE( wsaData.wVersion ) != 1 ||
HIBYTE( wsaData.wVersion ) != 1 )
{
WSACleanup( );
return;
}
SOCKET sockClient=socket(AF_INET,SOCK_STREAM,0);
SOCKADDR_IN addrSrv;
hostent* remoteHost;
char * const host_name= server;
unsigned int addr;
if (isalpha(host_name[0]))
{ /* host address is a name */
if (host_name[strlen(host_name)-1] == '\n')
host_name[strlen(host_name)-1] = '\0';
remoteHost = gethostbyname(host_name);
}
else
{
addr = inet_addr(host_name);
remoteHost = gethostbyaddr((char *)&addr, 4, AF_INET);
}
::memcpy(&addrSrv.sin_addr.S_un.S_addr,remoteHost->h_addr,remoteHost->h_length);
if (WSAGetLastError() != 0)
{
if (WSAGetLastError() == 11001)
printf("Host not found...\nExiting.\n");
}
else
printf("error#:%ld\n", WSAGetLastError());
addrSrv.sin_family=AF_INET;
addrSrv.sin_port=htons(80);
connect(sockClient,(SOCKADDR*)&addrSrv,sizeof(SOCKADDR));
vector<char *>::iterator iter = web[server].begin();
for (;iter != web[server].end();iter++)
{
//下载一个服务器上的网页
getPage(server, *iter,sockClient,web);
}
closesocket(sockClient);
WSACleanup();
cout<<"getPage success!"<<endl;
}
void getPage(char *const &server,char * http,SOCKET &sockClient,map<char *,vector<char *> > &web)
{
//http消息格式
::strcat(sendBuff,http);
::strcat(sendBuff," HTTP/1.0\r\nHost:");
::strcat(sendBuff,server);
::strcat(sendBuff,"\r\n\r\n");
send(sockClient,sendBuff,strlen(sendBuff),0);
int cc;
cc=recv(sockClient,recvBuf,1024*200,0);
char * tempBuf = recvBuf;
while(cc!=SOCKET_ERROR&&cc>0)
{
//循环下载网页数据并保存到map
while (boost::regex_search(tempBuf,httpSet,regStr))
{
string templink(httpSet[0].first,httpSet[0].second);//保存超链接
string tempserver(httpSet[1].first,httpSet[1].second);
char *const server = const_cast<char *>(tempserver.c_str());
web[server].push_back(const_cast<char *>(templink.c_str()));
tempBuf = const_cast<char *>(httpSet[0].second);
outFile<<tempserver<<endl;
outFile<<templink<<endl;
}
cc=recv(sockClient,recvBuf,1024*200,0);
}
}
//VC6编译通过...你的正则式写错了,lib没有加,还有几个str函数的域错了,map<char *,vector<char *> > web;>>改成> >
//没有来得及调试其它的了,有事闪先
#include <WinSock2.h>
#include <cstring>
#include <vector>
#include <fstream>
#include <map>
#include <iostream>
#include <boost/regex.hpp>
using namespace std;
#pragma comment( lib , "ws2_32.lib")
map<char *,vector<char *> > web;
char recvBuf[1024*200+1] = {0}; //接收缓冲区
char sendBuff[200] = "GET "; //发送缓冲区
char *str = "http://(\\w+\\.\\w+\\.\\w+)(/\\w+)*(/\\w+\\.\\w){1}";
boost::basic_regex<char> regStr(str);//查找http的正则表达式
boost::cmatch httpSet; //保存匹配的http
bool connectServ(const char *);//连接服务器,并下载网页
void getPage(char *server,
char * http,
SOCKET &sockClient,
map<char *,vector<char *> > &web); // 下载网页
int main(int argc, char **argv)
{
ofstream outFile;
char * const index="www.hnu.cn";
web[index].push_back("http://www.hnu.cn/");
map<char *,vector<char *> >::iterator map_It = web.begin();
for (;map_It != web.end();map_It++)
{
//从map对象中读取服务器和网址进行访问
connectServ(map_It->first);
}
outFile.open("http.txt",ios::out|ios::app);
for (;map_It != web.end();map_It++)
{
outFile<<"Server: "<<map_It->first<<endl;
vector<char *>::iterator beg_it = map_It->second.begin();
vector<char *>::iterator end_it = map_It->second.end();
for (;beg_it != end_it; beg_it++)
{
//打印服务器下所有网址
outFile<<*beg_it<<endl;
}
}
return 0;
}
bool connectServ(char * const &server)
{
WORD wVersionRequested;
WSADATA wsaData;
int err;
wVersionRequested = MAKEWORD( 1, 1 );
err = WSAStartup( wVersionRequested, &wsaData );
if ( err != 0 )
{
return false;
}
if ( LOBYTE( wsaData.wVersion ) != 1 ||
HIBYTE( wsaData.wVersion ) != 1 )
{
WSACleanup( );
return false;
}
SOCKET sockClient=socket(AF_INET,SOCK_STREAM,0);
SOCKADDR_IN addrSrv;
hostent* remoteHost;
char * const host_name= server;
unsigned int addr;
if (isalpha(host_name[0]))
{ /* host address is a name */
if (host_name[strlen(host_name)-1] == '\n')
host_name[strlen(host_name)-1] = '\0';
remoteHost = gethostbyname(host_name);
}
else
{
addr = inet_addr(host_name);
remoteHost = gethostbyaddr((char *)&addr, 4, AF_INET);
}
::memcpy(&addrSrv.sin_addr.S_un.S_addr,remoteHost->h_addr,remoteHost->h_length);
if (WSAGetLastError() != 0)
{
if (WSAGetLastError() == 11001)
printf("Host not found...\nExiting.\n");
}
else
printf("error#:%ld\n", WSAGetLastError());
addrSrv.sin_family=AF_INET;
addrSrv.sin_port=htons(80);
connect(sockClient,(SOCKADDR*)&addrSrv,sizeof(SOCKADDR));
vector<char *>::iterator iter = web[server].begin();
for (;iter != web[server].end();iter++)
{
//下载一个服务器上的网页
getPage(server, *iter,sockClient,web);
}
closesocket(sockClient);
WSACleanup();
cout<<"getPage success!"<<endl;
}
void getPage(char *server,char * http,SOCKET &sockClient,map<char *,vector<char *> > &web)
{
//http消息格式
::strcpy(sendBuff,http);
::strcpy(sendBuff," HTTP/1.0\r\nHost:");
::strcpy(sendBuff,server);
::strcpy(sendBuff,"\r\n\r\n");
send(sockClient,sendBuff,strlen(sendBuff),0);
int cc;
cc=recv(sockClient,recvBuf,1024*200,0);
char * tempBuf = recvBuf;
while(cc!=SOCKET_ERROR&&cc>0)
{
//循环下载网页数据并保存到map
while (boost::regex_search(tempBuf,httpSet,regStr))
{
string temp(httpSet[0].first,httpSet[0].second);
web[server].push_back(const_cast<char *>(temp.c_str()));
tempBuf = const_cast<char *>(httpSet[0].second);
}
cc=recv(sockClient,recvBuf,1024*200,0);
}
}