1问题描述

最近有人问我问题,说在Java中以UTF-8编码读UTF-8编码的文件test.htm,存入oracle数据库后再读出,一切正常。在Java中UTF-8编码读UTF-8编码的文件test.htm,存入kingbase数据库后再读出,文件中出现乱码,不知为何。

2具体现象

具体现象如下:

Java中读UTF-8编码的文件test.htm,存入数据库后再读出,文件中出现乱码

原文件图如下:


数据库编程之编码转换实例:乱码分析

3 存入数据库再读出后出现乱码如下:


数据库编程之编码转换实例:乱码分析

注:前面两幅图中“合资公司双方出资比例的问题”前面的那个“・”变成了“数据库编程之编码转换实例:乱码分析”。

4 相关代码

下面是模拟这个过程的读写文件和写读数据库的java代码:

import java.io.ByteArrayOutputStream;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.sql.Clob;

import java.sql.Connection;

import java.sql.DriverManager;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

public class TestEoding {

public static void main(String[] args) throws Exception {

//create table and insert 1 recode

//oracle,kingbase

// create table t_test (id int,txt clob);

// insert into t_test (id,txt) values(1,'');

//pg

// create table t_test (id int,txt text);

// insert into t_test (id,txt) values(1,'');

//kingbase

String driver = "com.kingbase.Driver";

String url = "jdbc:kingbase://127.0.0.1:54321/BEIGANG?ClientEncoding=UNICODE";

String usr = "BEIGANG";

String passwd = "beigang";

String fn = "E:/test.htm";

/*

//pg

String driver = "org.postgresql.Driver";

String url = "jdbc:postgresql://127.0.0.1:5432/beigang";

String usr = "beigang";

String passwd = "beigang";

String fn = "E:/test.htm";

*/

if (args.length > 0) {

fn = args[0];

}

String value = getValue(fn);

testDB(driver, url, usr, passwd, fn + ".kb", value);

/*

//sqlserver

driver = "net.sourceforge.jtds.jdbc.Driver";

url = "jdbc:jtds:sqlserver://127.0.0.1:1433/beigang";

usr = "beigang";

passwd = "beigang";

testDB2(driver, url, usr, passwd, fn + ".mssql", value);

driver = "oracle.jdbc.driver.OracleDriver";

url = "jdbc:oracle:thin:@127.0.0.1:1521:beigang";

usr = "beigang";

passwd = "beigang";

testDB2(driver, url, usr, passwd, fn + ".oracle", value);

*/

}

protected static void testDB(String driver, String url, String usr,

String passwd, String fn, String value) throws SQLException,

Exception {

Class.forName(driver);

Connection conn = DriverManager.getConnection(url, usr, passwd);

String sql = "update t_test set txt = ? where id=1";

PreparedStatement stmt = conn.prepareStatement(sql);

stmt.setString(1, value);

stmt.executeUpdate();

stmt.close();

sql = "select txt from t_test where id=1";

stmt = conn.prepareStatement(sql);

ResultSet rs = stmt.executeQuery();

if (rs.next()) {

writeFile(fn, rs.getString(1));

}

/*

* pg

String sql = "update t_test set txt = '" +value +"' where id=1";

ResultSet rs;

stmt.executeUpdate(sql);

sql = "select txt from t_test where id=1";

rs = stmt.executeQuery(sql);

if (rs.next()) {

writeFile(fn, rs.getString(1));

}

*/

}

protected static void testDB2(String driver, String url, String usr,

String passwd, String fn, String value) throws SQLException,

Exception {

Class.forName(driver);

Connection conn = DriverManager.getConnection(url, usr, passwd);

String sql = "update t_test set txt = ? where id=1";

PreparedStatement stmt = conn.prepareStatement(sql);

stmt.setString(1, value);

stmt.executeUpdate();

stmt.close();

sql = "select txt from t_test where id=1";

stmt = conn.prepareStatement(sql);

ResultSet rs = stmt.executeQuery();

if (rs.next()) {

writeFile(fn, rs.getString(1));

}

}

private static void writeFile(String fn, String string) throws Exception {

FileOutputStream fos = new FileOutputStream(fn);

byte[] content = string.getBytes("UTF-8");

fos.write(content);

fos.flush();

fos.close();

}

private static String getValue(String fn) throws Exception {

ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);

FileInputStream fins = new FileInputStream(fn);

int b = -1;

while ((b = fins.read()) != -1) {

baos.write(b);

}

return baos.toString("UTF-8");

//return baos.toString("GBK");

}

}

4 结论

结果测试确认,发现oracle、kingbase等数据库中存在同样问题,问题原因是编码"UTF8"的字符0xe383bb(就是“・”)在"GBK"没有相对应物。刚开始说的oracle不出现乱码,而kingbase出现乱码是他搞错了数据库服务器端编码,oracle用来UTF-8的编码,kingbase用了GBK的编码导致。

根本原因是"UTF8"编码的字符0xe383bb(就是"数据库编程之编码转换实例:乱码分析")在"GBK"没有相对应物。

5 数据库字符集和编码转换

参见我的博客http://blog.csdn.net/beiigang/article/details/7051605

6

把test.htm传上来,压缩文件使test.rar,有兴趣可以实践下

看了一下,不知道怎么把附件传上来,不知道这儿有这个功能没


相关文章:

  • 2022-02-07
  • 2022-12-23
  • 2022-01-02
  • 2022-01-08
  • 2022-12-23
  • 2022-01-08
  • 2021-06-13
猜你喜欢
  • 2021-09-21
  • 2021-04-09
  • 2022-12-23
  • 2022-12-23
  • 2021-10-18
  • 2021-05-06
  • 2021-09-21
相关资源
相似解决方案