httpclient 4.0 乱码问题 - 急

zdyhlp 2008-05-30 02:31:58
代码如下,web服务器的缺省字符集可能改变了,返回的总是乱码,前两天都还好的:
那位高手请帮忙分析一下原因,给出解决方案。

package com.zdy.stock;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.utils.URIUtils;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.protocol.HTTP;

public class HttpClientTest {

public static void main(String[] args) {
HttpClient client = new DefaultHttpClient();
//client.getParams().setParameter("http.protocol.content-charset", "UTF-8");
client.getParams().setParameter(HTTP.CONTENT_ENCODING, "UTF-8");
//client.getParams().setParameter(HTTP.CHARSET_PARAM, "UTF-8");
//client.getParams().setParameter(HTTP.DEFAULT_PROTOCOL_CHARSET, "UTF-8");
HttpResponse response = null;
HttpUriRequest uriRequest = null;
//HttpGet httpGet = null;
String webPageContent = null;
try {
uriRequest = new HttpGet(URIUtils.createURI("http","bill.finance.sina.com.cn",80,"bill/detail.php","stock_code=sh601699",null));
//uriRequest.getParams().setParameter(HTTP.CONTENT_ENCODING, "UTF-8");
//uriRequest.getParams().setParameter(HTTP.DEFAULT_PROTOCOL_CHARSET, "UTF-8");
//uriRequest.getParams().setParameter(HTTP.DEFAULT_CONTENT_CHARSET, "UTF-8");
//uriRequest.getParams().setParameter(HTTP.CHARSET_PARAM, "UTF-8");
uriRequest.setHeader("Content-type", "text/xml; charset=GB2312");
uriRequest.setHeader("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
} catch (URISyntaxException e1) {
e1.printStackTrace();
}

int tryCount = 0;
boolean success = false;
while (tryCount < 10 && !success) {
try {
tryCount++;
//response = client.execute(httpGet);
response = client.execute(uriRequest);
//response.setLocale(Locale.CHINA);
System.out.println("Locale = " + response.getLocale());
//client.getParams().setParameter(HTTP.CONTENT_ENCODING, "UTF-8");
//response.addHeader(HTTP.CONTENT_ENCODING, "UTF-8");
success = true;
} catch (HttpException e) {
if(tryCount==10) e.printStackTrace();
else System.out.print("HttpException,Try again!");
success = false;
} catch (IOException e) {
if(tryCount==10) e.printStackTrace();
else System.out.print("IOException,Try again!");
success = false;
}
}

HttpEntity entity = response.getEntity();

BufferedReader in = null;
if (entity != null) {
try {
in = new BufferedReader(new InputStreamReader(entity.getContent(),"GBK"));
//in = new BufferedReader(new InputStreamReader(entity.getContent()));

StringBuffer buffer = new StringBuffer();
String line = null;
while ((line = in.readLine()) != null) {
buffer.append(line);
buffer.append('\n');
}
webPageContent = buffer.toString();
} catch (IOException e) {
e.printStackTrace();
}
}

byte [] b=null;

try {
b = webPageContent.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
try {
String name = new String(b, "UTF-8");
System.out.print(name);
name = new String(b, "GB2312");
System.out.print(name);
name = new String(b, "GBK");
System.out.print(name);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}

}
...全文
1094 9 打赏 收藏 转发到动态 举报
写回复
用AI写文章
9 条回复
切换为时间正序
请发表友善的回复…
发表回复
zhj92lxs 2008-05-31
  • 打赏
  • 举报
回复
不清楚
zdyhlp 2008-05-31
  • 打赏
  • 举报
回复
Tavor:看编码是GB2312,
改成client.getParams().setParameter(HTTP.CONTENT_ENCODING, "GB2312");
但还是不能解决乱码的问题。
sunyujia 2008-05-31
  • 打赏
  • 举报
回复
我这不行,报java.util.zip.DataFormatException: incomplete dynamic bit lengths tree
如果lz成功的话请指点下。

public class Test {
public static void main(String[] args) throws Exception {
HttpClient client = new HttpClient();
client.getHostConfiguration().setHost("bill.finance.sina.com.cn", 80,
"http");
GetMethod mt = new GetMethod("/bill/detail.php?stock_code=sh601699");
mt.addRequestHeader("Content-type", "text/xml; charset=GB2312");
mt.addRequestHeader("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
client.executeMethod(mt);

byte[] outBytes = new byte[1024];//只取一个字节做测试
Inflater inflater = new Inflater(true);
inflater.setInput(mt.getResponseBody());
try {
inflater.inflate(outBytes, 0, outBytes.length);
System.out.println(new String(outBytes));
} catch (DataFormatException e) {
e.printStackTrace();
}
}
}
tavor 2008-05-31
  • 打赏
  • 举报
回复
强,原来用Inflater 类就可以,我用了InflaterInputStream类,结果不行,晕呀
tavor 2008-05-31
  • 打赏
  • 举报
回复
我也是这么认为,查了好半天,也没找着httpclient对它的处理,我在4.0的example目录下找到对GZIP的压缩方式的处理,但根本没有找到对DEFLATE压缩方式的处理,也没找到相似的类,
最后在GOOGLE上搜了一下,发现有讨论这个问题,我按照上面的方式写了一下代码,如下

HttpResponse rsp = client.execute(target, req, null);
entity = rsp.getEntity();

InflaterInputStream encoded = new InflaterInputStream(entity.getContent());
final ByteArrayOutputStream decoded = new ByteArrayOutputStream ();
final byte buffer[] = new byte[1024];
int length;
while((length = encoded.read (buffer)) != -1)
{
decoded.write (buffer, 0, length);
}

// closing
encoded.close ();
decoded.close ();
if (entity != null) {
System.out.println(new String (decoded.toByteArray(),"gb2312"));//EntityUtils.toString(rsp.getEntity()));
}

结果还是不行,会报下面解码的错

Exception in thread "main" java.util.zip.ZipException: unknown compression method
at java.util.zip.InflaterInputStream.read(InflaterInputStream.java:140)
at java.io.FilterInputStream.read(FilterInputStream.java:90)
at ClientExecuteDirect.main(ClientExecuteDirect.java:112)


网上的讨论
http://markmail.org/message/kxo7lsmy4kuexrym
zdyhlp 2008-05-31
  • 打赏
  • 举报
回复
终于解决掉了这个问题,果然是body被压缩了,解压即可.
byte[] outBytes = new byte[32768];
Inflater inflater = new Inflater(true);
inflater.setInput(responseBodyBytes);
try {
inflater.inflate(outBytes);
System.out.println(new String(outBytes));
} catch (DataFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
zdyhlp 2008-05-31
  • 打赏
  • 举报
回复
编码类型怎么是deflate,似乎被压缩了
http://bill.finance.sina.com.cn/bill/detail.php?stock_code=sh601699
Content-Length: 6396
Date: Sat, 31 May 2008 10:03:13 GMT
Content-Type: text/html
Content-Encoding: deflate
Server: nginx/0.5.19
tavor 2008-05-30
  • 打赏
  • 举报
回复
直接访问页面,看IE中乱码不,如不乱,可右键看它使用的编码方式,
然后你在代码里用它的这种编码方式解析就OK了

67,514

社区成员

发帖
与我相关
我的任务
社区描述
J2EE只是Java企业应用。我们需要一个跨J2SE/WEB/EJB的微容器,保护我们的业务核心组件(中间件),以延续它的生命力,而不是依赖J2SE/J2EE版本。
社区管理员
  • Java EE
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧