【发布时间】:2016-11-08 08:44:10
【问题描述】:
我将 clob 中的 xml 从 oracle 传递到 java 源,然后对 xml 进行签名并返回结果,但是返回的结果包含特殊字符。 f.e.
输入xml:
<a>žė</a>
输出xml:
<a>žė</a>
如果我尝试在 java 中打印结果,则没有特殊字符,但是在 oracle 中我得到了这些。 如果我从第一个 clob 获取字符流并分配给结果 clob,则没有特殊字符,它们仅在我将文档转换为 clob 时出现。
要重现的代码:
create or replace and compile java source named test_encoding as
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.BufferedReader;
import java.io.Writer;
import java.io.StringWriter;
import java.security.*;
import java.security.cert.X509Certificate;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import sun.misc.BASE64Encoder;
import sun.misc.BASE64Decoder;
public class test_encoding {
public static Clob getxml(Clob inputClob, Clob outputClob) throws Exception{
Document document = getDocument(inputClob);
//outputClob = inputClob;
//String inputString = getStringFromIS(readerToInputStream(inputClob.getCharacterStream()));
String inputString = getStringFromIS(getInputStreamFromDocument(document));
Writer writer = outputClob.setCharacterStream(1);
writer.write(inputString.toCharArray());
writer.close();
return outputClob;
}
public static InputStream readerToInputStream(Reader reader) throws Exception {
char[] charBuffer = new char[8 * 1024];
StringBuilder builder = new StringBuilder();
int numCharsRead;
while ((numCharsRead = reader.read(charBuffer, 0, charBuffer.length)) != -1) {
builder.append(charBuffer, 0, numCharsRead);
}
return new ByteArrayInputStream(builder.toString().getBytes("UTF-8"));
}
public static String getStringFromIS(InputStream is) throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(is, "UTF-8"));
String str = "";
String line;
while ((line = in.readLine()) != null) {
str = str.concat(line);
}
return str;
}
public static Document getDocument(Clob xmlClob) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
return dbf.newDocumentBuilder().parse(readerToInputStream(xmlClob.getCharacterStream()));
}
public static InputStream getInputStreamFromElement(Element element) throws Exception {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Source xmlSource = new DOMSource(element);
Result outputTarget = new StreamResult(outputStream);
TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
return is;
}
public static InputStream getInputStreamFromDocument(Document document) throws Exception {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Source xmlSource = new DOMSource(document);
Result outputTarget = new StreamResult(outputStream);
TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
return is;
}
}
plsql函数:
create or replace function test_encoding(
p_input_clob clob,
p_output_clop clob)
--return varchar2 as
return clob as
--language java name 'SignXMLNew.signXml(java.sql.Clob, java.lang.String, java.lang.String, java.lang.String, java.lang.String) return java.lang.String';
language java name 'test_encoding.getxml(java.sql.Clob, java.sql.Clob) return java.sql.Clob';
脚本:
declare
l_input_xml clob := xmlType('<a>žė</a>').getClobVal();
l_output_xml clob;
begin
dbms_lob.createtemporary(l_output_xml, false);
dbms_output.put_line(l_input_xml);
l_output_xml := test_encoding(l_input_xml, l_output_xml);
dbms_output.put_line(l_output_xml);
end;
/
更新
确定字符“žė”转换为html字符的位置。这是在转换文档时。我已经将文档转换为 byte[],并打印了它的十六进制表示形式,并且 html 特殊字符已经存在。我在 Eclipse 中做了同样的事情,那里没有 html 字符。 f.e.
Result in oracle: 3C613E26233338323B26233237393B3C2F613E
Result in eclipse: 3C613EC5BEC4973C2F613E
在 Oracle 和 Eclipse 中,我使用完全相同的函数将文档转换为 byte[] 并打印出字节数组的十六进制表示。
将文档转换为字节[]的函数:
public static byte[] getDocumentByteArray(Document doc) throws Exception {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Source xmlSource = new DOMSource(doc);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
Result outputTarget = new StreamResult(outputStream);
transformer.transform(xmlSource, outputTarget);
return outputStream.toByteArray();
}
public static String bytesToHex(byte[] bytes) {
char[] hexArray = "0123456789ABCDEF".toCharArray();
char[] hexChars = new char[bytes.length * 2];
for (int j = 0; j < bytes.length; j++) {
int v = bytes[j] & 0xFF;
hexChars[j * 2] = hexArray[v >>> 4];
hexChars[j * 2 + 1] = hexArray[v & 0x0F];
}
return new String(hexChars);
}
可能是转换器采用语言环境编码,例如在 eclipse 中它从我的 pc 的 nls_lang 中获取编码,而在 oracle 中它采用 oracles 服务器 nls_lang。如果是这样,我该如何指定要使用的编码,因为看起来
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
没用。
【问题讨论】:
-
getClobVal()产生的XML编码是什么? -
你是这个意思吗?
-
是的。我预计会看到类似“US-ASCII”的东西,它与所有东西都完美兼容,但需要大量转义。
-
你的数据库
NLS_CHARACTERSET是什么? -
是,AL32UTF8。
标签: java xml oracle domdocument clob