使用Java DOM解析XML字符串
Java DOM解析器是一种将XML文档加载到内存中并构建为树形结构的方法,可以方便地遍历和操作XML文档,下面我将介绍如何使用Java DOM解析XML字符串。

基本步骤
- 创建
DocumentBuilderFactory和DocumentBuilder实例 - 将XML字符串转换为输入源
- 解析XML字符串并获取
Document对象 - 遍历和操作XML文档
- 可选:将修改后的文档转换回字符串
示例代码
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.io.*;
public class DomParserExample {
public static void main(String[] args) {
String xmlString = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ "<bookstore>"
+ " <book category=\"cooking\">"
+ " <title lang=\"en\">Everyday Italian</title>"
+ " <author>Giada De Laurentiis</author>"
+ " <year>2005</year>"
+ " <price>30.00</price>"
+ " </book>"
+ " <book category=\"children\">"
+ " <title lang=\"en\">Harry Potter</title>"
+ " <author>J.K. Rowling</author>"
+ " <year>2005</year>"
+ " <price>29.99</price>"
+ " </book>"
+ "</bookstore>";
try {
// 1. 创建DocumentBuilderFactory和DocumentBuilder
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
// 2. 将XML字符串转换为输入源
InputSource is = new InputSource(new StringReader(xmlString));
// 3. 解析XML字符串并获取Document对象
Document document = builder.parse(is);
// 4. 遍历和操作XML文档
// 获取根元素
Element rootElement = document.getDocumentElement();
System.out.println("Root element: " + rootElement.getNodeName());
// 获取所有book节点
NodeList bookList = rootElement.getElementsByTagName("book");
// 遍历所有book节点
for (int i = 0; i < bookList.getLength(); i++) {
Node bookNode = bookList.item(i);
if (bookNode.getNodeType() == Node.ELEMENT_NODE) {
Element bookElement = (Element) bookNode;
// 获取category属性
String category = bookElement.getAttribute("category");
System.out.println("\nBook category: " + category);
// 获取title元素
NodeList titleList = bookElement.getElementsByTagName("title");
Element titleElement = (Element) titleList.item(0);
String title = titleElement.getTextContent();
System.out.println("Title: " + title);
// 获取lang属性
String lang = titleElement.getAttribute("lang");
System.out.println("Language: " + lang);
// 获取author元素
NodeList authorList = bookElement.getElementsByTagName("author");
Element authorElement = (Element) authorList.item(0);
String author = authorElement.getTextContent();
System.out.println("Author: " + author);
}
}
// 5. 可选:将修改后的文档转换回字符串
// (这里只是示例,没有修改文档)
System.out.println("\nModified XML (if any changes were made):");
// 输出修改后的XML字符串
// System.out.println(documentToString(document));
} catch (Exception e) {
e.printStackTrace();
}
}
// 可选方法:将Document对象转换为字符串
private static String documentToString(Document doc) throws Exception {
// 这里可以使用Transformer将Document转换为字符串
// 需要添加Transformer相关的代码
return "Document to string conversion not implemented in this example";
}
}
更完整的示例(包含修改和输出)
import org.w3c.dom.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
public class CompleteDomExample {
public static void main(String[] args) {
String xmlString = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ "<bookstore>"
+ " <book category=\"cooking\">"
+ " <title lang=\"en\">Everyday Italian</title>"
+ " <author>Giada De Laurentiis</author>"
+ " <year>2005</year>"
+ " <price>30.00</price>"
+ " </book>"
+ "</bookstore>";
try {
// 1. 创建DocumentBuilderFactory和DocumentBuilder
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
// 2. 解析XML字符串
Document document = builder.parse(new InputSource(new StringReader(xmlString)));
// 3. 修改XML文档
// 添加新元素
Element rootElement = document.getDocumentElement();
Element newBook = document.createElement("book");
newBook.setAttribute("category", "web");
Element title = document.createElement("title");
title.setAttribute("lang", "en");
title.setTextContent("Learning XML");
Element author = document.createElement("author");
author.setTextContent("Erik T. Ray");
Element year = document.createElement("year");
year.setTextContent("2003");
Element price = document.createElement("price");
price.setTextContent("39.95");
newBook.appendChild(title);
newBook.appendChild(author);
newBook.appendChild(year);
newBook.appendChild(price);
rootElement.appendChild(newBook);
// 4. 将修改后的文档转换回字符串
String modifiedXml = documentToString(document);
System.out.println("Modified XML:");
System.out.println(modifiedXml);
} catch (Exception e) {
e.printStackTrace();
}
}
// 将Document对象转换为字符串的方法
private static String documentToString(Document doc) throws Exception {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
StringWriter writer = new StringWriter();
transformer.transform(new DOMSource(doc), new StreamResult(writer));
return writer.toString();
}
}
注意事项
-
内存消耗:DOM解析器会将整个XML文档加载到内存中,对于大型XML文件可能会消耗较多内存。
-
线程安全:
DocumentBuilderFactory和DocumentBuilder不是线程安全的,每个线程应该创建自己的实例。 -
异常处理:解析XML时可能会抛出各种异常,如
SAXException、IOException等,需要适当处理。 -
命名空间:如果XML文档使用命名空间,需要配置
DocumentBuilderFactory来支持命名空间解析。
(图片来源网络,侵删) -
性能考虑:对于大型XML文件或需要频繁解析的场景,考虑使用SAX或StAX解析器,它们是事件驱动的,内存效率更高。
希望这些示例能帮助你理解如何使用Java DOM解析XML字符串!

