-

- 加为好友
- 发送私信
- 在线聊天
|
| 发表于:2008-08-08 14:57:25 楼主 |
大家好,我在用代码下载网页时,发现下载后有点问题,前部份的内容是正确的,到文件末尾就有点错误了 这造成我调用IHTMLAnchorElement::get_href 这个函数时,取到链接是错误的; 取得链接如:about:blank/News/List.asp?CategoryID=14 正确的链接应该是: http:\\www.xxx.com/News/List.asp?CategoryID=14 请大家帮帮忙,谢谢! 下载代码: - C/C++ code
bool CHtmlContent::GetSourceHtml( const CString &theUrl, CStringBuf &buf )
{
CInternetFile* file = NULL;
CInternetSession session;
CString str;
try
{
file = (CInternetFile*) session.OpenURL(theUrl);
}
catch (CInternetException* m_pException)
{
file = NULL;
m_pException->Delete();
return false;
}
buf.m_buf="";
while( file->ReadString(str) != NULL )
{
buf.m_buf+=str+_T("\n");
}
PrintBuf(buf.m_buf);
return true;
}
//正确的 - HTML code
<DIV class=style4 align=center>电话:0595-87390967 传真:0595-87372536 ICP备案:<A class=style2 href="http://www.miibeian.gov.cn/">闽ICP备06001590号</A> <FONT face="Arial, Helvetica, sans-serif"><FONT face="Arial, Helvetica, sans-serif">Powered by </FONT><SPAN class=style50><FONT face=Verdana, sans-serif helvetica, arial,><B><A class=style41 href="http://www.harcw.net" target=_blank>harcw.net</A> <A class=style50 href="../admin/" target=_blank>管理...</A></B></FONT><FONT face=Verdana, sans-serif helvetica, arial,><B><A class=style50 href="/admin/" target=_blank></A>
<SCRIPT language=javascript src="http://js.users.51.la/1834712.js" type=text/javascript></SCRIPT>
<A href="http://www.51.la/?1834712" target=_blank><IMG style="BORDER-RIGHT: medium none; BORDER-TOP: medium none; BORDER-LEFT: medium none; BORDER-BOTTOM: medium none" alt="51.la 专业、免费、强健的访问统计" src="http://icon.ajiang.net/icon_8.gif"></A>
<SCRIPT>var a4712tf="51la";var a4712pu="";var a4712pf="51la";var a4712su=window.location;var a4712sf=document.referrer;var a4712of="";var a4712op="";var a4712ops=1;var a4712ot=1;var a4712d=new Date();var a4712color="";if (navigator.appName=="Netscape"){a4712color=screen.pixelDepth;} else {a4712color=screen.colorDepth;}</SCRIPT>
<SCRIPT>a4712tf=top.document.referrer;</SCRIPT>
<SCRIPT>a4712pu =window.parent.location;</SCRIPT>
<SCRIPT>a4712pf=window.parent.document.referrer;</SCRIPT>
<SCRIPT>a4712ops=document.cookie.match(new RegExp("(^| )AJSTAT_ok_pages=([^;]*)(;|$)"));a4712ops=(a4712ops==null)?1: (parseInt(unescape((a4712ops)[2]))+1);var a4712oe =new Date();a4712oe.setTime(a4712oe.getTime()+60*60*1000);document.cookie="AJSTAT_ok_pages="+a4712ops+ ";path=/;expires="+a4712oe.toGMTString();a4712ot=document.cookie.match(new RegExp("(^| )AJSTAT_ok_times=([^;]*)(;|$)"));if(a4712ot==null){a4712ot=1;}else{a4712ot=parseInt(unescape((a4712ot)[2])); a4712ot=(a4712ops==1)?(a4712ot+1):(a4712ot);}a4712oe.setTime(a4712oe.getTime()+365*24*60*60*1000);document.cookie="AJSTAT_ok_times="+a4712ot+";path=/;expires="+a4712oe.toGMTString();</SCRIPT>
<SCRIPT>a4712of=a4712sf;if(a4712pf!=="51la"){a4712of=a4712pf;}if(a4712tf!=="51la"){a4712of=a4712tf;}a4712op=a4712pu;try{lainframe}catch(e){a4712op=a4712su;}document.write('<img style="width:0px;height:0px" src="http://web.51.la/go.asp?we=A-Free-Service-for-Webmasters&svid=52&id=1834712&tpages='+a4712ops+'&ttimes='+a4712ot+'&tzone='+(0-a4712d.getTimezoneOffset()/60)+'&tcolor='+a4712color+'&sSize='+screen.width+','+screen.height+'&referrer='+escape(a4712of)+'&vpage='+escape(a4712op)+'" />');</SCRIPT>
<IMG style="WIDTH: 0px; HEIGHT: 0px" src="http://web.51.la/go.asp?we=A-Free-Service-for-Webmasters&svid=52&id=1834712&tpages=3&ttimes=5&tzone=8&tcolor=32&sSize=1024,768&referrer=http%3A//www.harc.gov.cn/main/persons.asp%3Fpage%3D2&vpage=http%3A//www.harc.gov.cn/main/persons.asp%3Fpage%3D1"> <NOSCRIPT><a href="http://www.51.la/?1834712" target="_blank"><img alt="我要啦免费统计" src="http://img.users.51.la/1834712.asp" style="border:none" /></a></NOSCRIPT></B></FONT></SPAN></FONT></DIV></TD></TR></TBODY></TABLE></TD></TR></TBODY></TABLE></BODY></HTML>
//下载下来的 - HTML code
<DIV class=style4 align=center>电话:0595-87390967 传真:0595-87372536 ICP备案:<A class=style2 href="http://www.miibeian.gov.cn/">闽ICP备06001590号</A> <FONT face="Arial, Helvetica, sans-serif"><FONT face="Arial, Helvetica, sans-serif">Powered by </FONT><SPAN class=style50><FONT face=Verdana, sans-serif helvetica, arial,><B><A class=style41 href="http://www.harcw.net" target=_blank>harcw.net</A> <A class=style50 href="../admin/" target=_blank>管理...</A></B></FONT><FONT face=Verdana, sans-serif helvetica, arial,><B><A class=style50 href="/admin/" target=_blank></A>
<SCRIPT language=javascript src="http://js.users.51.la/1834712.js" type=text/javascript></SCRIPT>
<NOSCRIPT><a href="http://www.51.la/?1834712" target="_blank"><img alt="我要啦免费统计" src="http://img.users.51.la/1834712.asp" style="border:none" /></a></NOSCRIPT></B></FONT></SPAN></FONT></DIV></TD></TR></TBODY></TABLE></TD></TR></TBODY></TABLE></BODY></HTML>
我搞不明白为什么根目录是about:blank 我是能过上面的代码把html文件给下载下来,然后再把这些内容通过下面的代码写到文档里面, 再由文档取得IHTMLAnchorElement,再通过IHTMLAnchorElement::get_href取得连接,结果发现连接的根目录不对 我查一下下载下来的内容 与 通过"ie的查看菜单==>源文件菜单"取下的内容对比一下,结果发现内容有点出入 具体不同点在上面 //将buf写入document - C/C++ code
IHTMLDocument2Ptr CHtmlContent::GetDocument( CStringBuf &buf )
{
IHTMLDocument2Ptr pDoc2;
HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2, (void**)&pDoc2);
if( SUCCEEDED(hr) )
{
//put the code into SAFEARRAY and write it into document
SAFEARRAY* psa = SafeArrayCreateVector(VT_VARIANT, 0, 1);
VARIANT *param;
_bstr_t bsData = (LPCTSTR)buf.m_buf;
hr = SafeArrayAccessData(psa, (LPVOID*)¶m);
param->vt = VT_BSTR;
param->bstrVal = (BSTR)bsData;
hr = pDoc2->write(psa);
hr = pDoc2->close();
SafeArrayDestroy(psa);
}
return pDoc2;
}
|
|
|
|
100
修改
删除
举报
引用
回复
| |