在Java中解析CDATA XML
CDATA (Character Data) 是XML中的一种特殊区域,用于包含不会被解析器解析的文本内容,在Java中解析包含CDATA的XML,可以使用多种方法,以下是几种常见的实现方式:

使用DOM解析器
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.io.*;
public class CDataDOMParser {
public static void main(String[] args) {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File("input.xml"));
NodeList nodes = document.getElementsByTagName("content");
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
// 获取CDATA节点
NodeList cDataNodes = element.getChildNodes();
for (int j = 0; j < cDataNodes.getLength(); j++) {
Node child = cDataNodes.item(j);
if (child.getNodeType() == Node.CDATA_SECTION_NODE) {
CDATASection cData = (CDATASection) child;
System.out.println("CDATA内容: " + cData.getData());
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
使用SAX解析器
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.*;
import java.io.*;
public class CDataSAXParser extends DefaultHandler {
private boolean inCData = false;
private StringBuilder cDataContent = new StringBuilder();
@Override
public void startElement(String uri, String localName,
String qName, Attributes attributes) {
if (qName.equals("content")) {
inCData = true;
cDataContent.setLength(0);
}
}
@Override
public void characters(char[] ch, int start, int length) {
if (inCData) {
cDataContent.append(ch, start, length);
}
}
@Override
public void endElement(String uri, String localName, String qName) {
if (qName.equals("content")) {
inCData = false;
System.out.println("CDATA内容: " + cDataContent.toString());
}
}
public static void main(String[] args) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
parser.parse(new File("input.xml"), new CDataSAXParser());
} catch (Exception e) {
e.printStackTrace();
}
}
}
使用JAXB (Java Architecture for XML Binding)
import javax.xml.bind.*;
import javax.xml.bind.annotation.*;
import java.io.*;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
class CDataWrapper {
@XmlValue
@XmlCDATA
private String content;
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
public class CDataJAXBParser {
public static void main(String[] args) {
try {
JAXBContext context = JAXBContext.newInstance(CDataWrapper.class);
Unmarshaller unmarshaller = context.createUnmarshaller();
CDataWrapper wrapper = (CDataWrapper) unmarshaller.unmarshal(new File("input.xml"));
System.out.println("CDATA内容: " + wrapper.getContent());
} catch (Exception e) {
e.printStackTrace();
}
}
}
使用StAX (Streaming API for XML)
import javax.xml.stream.*;
import javax.xml.stream.events.*;
import java.io.*;
public class CDataStAXParser {
public static void main(String[] args) {
try {
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader eventReader = factory.createXMLEventReader(new FileInputStream("input.xml"));
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
StartElement startElement = event.asStartElement();
if (startElement.getName().getLocalPart().equals("content")) {
event = eventReader.nextEvent();
if (event.isCharacters()) {
Characters characters = event.asCharacters();
if (characters.isCData()) {
System.out.println("CDATA内容: " + characters.getData());
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
注意事项
-
CDATA识别:在DOM中,CDATA节点是
Node.CDATA_SECTION_NODE类型;在SAX中,需要检查characters()方法中的内容是否为CDATA;在StAX中,Characters对象有isCData()方法。 -
性能考虑:
- DOM:适合小型XML文档,将整个文档加载到内存
- SAX:适合大型XML文档,事件驱动,内存占用小
- StAX:介于DOM和SAX之间,提供更灵活的解析控制
-
XML声明:确保你的XML文件有正确的声明,如
<?xml version="1.0" encoding="UTF-8"?> -
依赖:对于DOM和SAX,你可能需要添加JAXP实现(通常包含在JDK中);对于JAXB,需要JAXB 2.0+;对于StAX,需要StAX API。
(图片来源网络,侵删)
选择哪种方法取决于你的具体需求,如XML大小、性能要求和代码复杂度等因素。

