Of course! Working with XML schemas in Python is a common task, especially when dealing with configuration files, data interchange, or ensuring document structure. The primary tool for this in Python is the built-in xml.etree.ElementTree module.

Here's a comprehensive guide covering the main aspects of XML schema handling in Python.
Parsing an XML File (Reading)
This is the most common task: loading an XML file from disk or a string and navigating its structure.
The XML File (library.xml)
Let's use this sample XML for our examples.
<?xml version="1.0"?>
<library>
<book category="FICTION">
<title lang="en">The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<year>1925</year>
<price>10.99</price>
</book>
<book category="SCIENCE">
<title lang="en">A Brief History of Time</title>
<author>Stephen Hawking</author>
<year>1988</year>
<price>15.99</price>
</book>
</library>
Python Code to Parse and Navigate
import xml.etree.ElementTree as ET
# --- Method 1: Parsing from a file ---
try:
tree = ET.parse('library.xml')
root = tree.getroot()
except FileNotFoundError:
print("Error: library.xml not found. Please create it.")
exit()
# --- Method 2: Parsing from a string (useful for API responses) ---
xml_string = """
<library>
<book category="FICTION">
<title lang="en">The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<year>1925</year>
<price>10.99</price>
</book>
</library>
"""
root_from_string = ET.fromstring(xml_string)
# --- Navigating the XML Tree ---
print(f"Root tag: {root.tag}\n")
# --- Iterating ---
# Find all 'book' elements anywhere in the tree
all_books = root.findall('book')
print(f"Found {len(all_books)} book(s).")
for book in all_books:
# Get the 'category' attribute
category = book.get('category')
print(f"\nBook Category: {category}")
# Find a specific child element by tag
# find() searches only the immediate childrenelement = book.find('title')
author_element = book.find('author')
# Get the text content of the element
print(f" Title: {title_element.text}")
print(f" Author: {author_element.text}")
# Get an attribute of an element (e.g., 'lang' from 'title')
lang = title_element.get('lang')
print(f" Language: {lang}")
# --- Iterating children of a specific element ---
print(" All child tags:")
for child in book:
print(f" - {child.tag}: {child.text}")
# --- Using XPath-like expressions ---
# findall() with a path searches the entire tree
# '.' means current node, '//' means search anywhere in descendants
first_book_title = root.find('.//title').text
print(f"\nFirst book title found with XPath: {first_book_title}")
# Find all 'year' elements
all_years = root.findall('.//year')
print(f"\nAll years found: {[year.text for year in all_years]}")
Generating XML (Writing)
You can create XML documents from scratch in Python.

import xml.etree.ElementTree as ET
# Create the root element
root = ET.Element("library")
# Add child elements
book1 = ET.SubElement(root, "book", attrib={"category": "FICTION"})
book2 = ET.SubElement(root, "book", attrib={"category": "SCIENCE"})
# Add sub-elements to the first book1 = ET.SubElement(book1, "title", attrib={"lang": "en"})1.text = "To Kill a Mockingbird"
author1 = ET.SubElement(book1, "author")
author1.text = "Harper Lee"
year1 = ET.SubElement(book1, "year")
year1.text = "1960"
price1 = ET.SubElement(book1, "price")
price1.text = "12.50"
# Add sub-elements to the second book2 = ET.SubElement(book2, "title", attrib={"lang": "en"})2.text = "Cosmos"
author2 = ET.SubElement(book2, "author")
author2.text = "Carl Sagan"
year2 = ET.SubElement(book2, "year")
year2.text = "1980"
price2 = ET.SubElement(book2, "price")
price2.text = "14.00"
# Create an ElementTree object
tree = ET.ElementTree(root)
# Write to a file
# The 'encoding' and 'xml_declaration' are important for proper formatting
tree.write("new_library.xml", encoding="utf-8", xml_declaration=True)
print("Successfully created new_library.xml")
Output (new_library.xml):
<?xml version='1.0' encoding='utf-8'?> <library><book category="FICTION"><title lang="en">To Kill a Mockingbird</title><author>Harper Lee</author><year>1960</year><price>12.50</price></book><book category="SCIENCE"><title lang="en">Cosmos</title><author>Carl Sagan</author><year>1980</year><price>14.00</price></book></library>
Note: The output is not pretty-printed by default. See the "Pretty-Printing" section below.
Modifying an Existing XML File
You can parse a file, change its elements, and save it back.
import xml.etree.ElementTree as ET
# Parse the existing file
tree = ET.parse('library.xml')
root = tree.getroot()
# --- Modify an element ---
# Find the first book and change its price
first_book = root.find('book')
price_element = first_book.find('price')
price_element.text = "11.99" # Changed from 10.99
# --- Add a new attribute ---
# Add a 'status' attribute to the second book
second_book = root.findall('book')[1]
second_book.set('status', 'in-stock')
# --- Add a new element ---
# Add a 'pages' element to the first book
pages_element = ET.SubElement(first_book, 'pages')
pages_element.text = "180"
# --- Remove an element ---
# Remove the 'year' from the second book
year_to_remove = second_book.find('year')
second_book.remove(year_to_remove)
# Save the modified tree back to the file
tree.write('modified_library.xml', encoding="utf-8", xml_declaration=True)
print("Successfully modified library.xml and saved as modified_library.xml")
Pretty-Printing XML
The default output from tree.write() is not formatted. To make it human-readable, you can use a helper function or the lxml library.

Method A: Using a Helper Function with xml.dom.minidom
This is a common trick with the standard library.
import xml.etree.ElementTree as ET
from xml.dom import minidom
# ... (use the 'root' element from the generation example) ...
# Helper function to pretty-print
def prettify(elem):
"""Return a pretty-printed XML string for the Element."""
rough_string = ET.tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")
# Get the pretty-printed string
pretty_xml_as_string = prettify(root)
# Write the pretty string to a file
with open("pretty_library.xml", "w", encoding="utf-8") as f:
f.write(pretty_xml_as_string)
print("Successfully created pretty_library.xml")
Output (pretty_library.xml):
<?xml version="1.0" ?>
<library>
<book category="FICTION">lang="en">To Kill a Mockingbird</title>
<author>Harper Lee</author>
<year>1960</year>
<price>12.50</price>
</book>
<book category="SCIENCE" status="in-stock">lang="en">Cosmos</title>
<author>Carl Sagan</author>
<price>14.00</price>
<pages>180</pages>
</book>
</library>
Method B: Using the lxml Library (Recommended)
The lxml library is a more powerful and feature-rich alternative to the standard ElementTree. It's often faster and has better support for XML standards, including pretty-printing out of the box.
First, install it:
pip install lxml
Then, use it:
from lxml import etree as ET # Note the import alias
# ... (use the 'root' element from the generation example) ...
# Create an ElementTree object
tree = ET.ElementTree(root)
# Write to a file with pretty-printing
# The 'pretty_print=True' argument does all the work!
tree.write("lxml_pretty_library.xml",
encoding="utf-8",
xml_declaration=True,
pretty_print=True)
print("Successfully created lxml_pretty_library.xml")
Validating Against an XML Schema (XSD)
Important: The standard xml.etree.ElementTree cannot validate an XML document against a schema (XSD or DTD). For validation, you must use an external library. The most popular choice is lxml.
The Schema File (library.xsd)
This schema defines the rules for our library.xml file.
<?xml version="1.0"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="library">
<xs:complexType>
<xs:sequence>
<xs:element name="book" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="title" type="xs:string"/>
<xs:element name="author" type="xs:string"/>
<xs:element name="year" type="xs:positiveInteger"/>
<xs:element name="price" type="xs:decimal"/>
</xs:sequence>
<xs:attribute name="category" type="xs:string" use="required"/>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
Python Code for Validation using lxml
from lxml import etree
def validate_xml(xml_file, xsd_file):
"""Validates an XML file against an XSD schema."""
try:
# Parse the XSD schema
with open(xsd_file, 'rb') as f:
xsd_doc = etree.parse(f)
xsd = etree.XMLSchema(xsd_doc)
# Parse the XML file to be validated
with open(xml_file, 'rb') as f:
xml_doc = etree.parse(f)
# Validate the XML against the schema
is_valid = xsd.validate(xml_doc)
if is_valid:
print(f"SUCCESS: {xml_file} is valid against {xsd_file}")
else:
print(f"FAILURE: {xml_file} is NOT valid against {xsd_file}")
# Print validation errors
for error in xsd.error_log:
print(f" - Line {error.line}: {error.message}")
except etree.XMLSchemaError as e:
print(f"Schema Error: {e}")
except FileNotFoundError as e:
print(f"File Not Found Error: {e}")
# --- Test with a valid file ---
validate_xml('library.xml', 'library.xsd')
print("-" * 20)
# --- Test with an invalid file ---
# Let's create an invalid XML file
invalid_xml_content = """
<library>
<book category="FICTION">
<title lang="en">The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<year>not_a_year</year> <!-- This will cause a validation error -->
<price>10.99</price>
</book>
</library>
"""
with open('invalid_library.xml', 'w') as f:
f.write(invalid_xml_content)
validate_xml('invalid_library.xml', 'library.xsd')
Summary and Recommendations
| Task | Recommended Tool | Notes |
|---|---|---|
| Simple Parsing/Navigating | xml.etree.ElementTree |
Built-in, no installation needed. Great for most tasks. |
| Generating XML | xml.etree.ElementTree |
Simple and effective. Use lxml for advanced features. |
| Pretty-Printing | lxml (recommended) or minidom trick |
lxml is cleaner and more robust. |
| Validation (XSD/DTD) | lxml |
Essential. The standard library does not support this. |
| Advanced Features (XPath 2.0, XSLT, better performance) | lxml |
A powerful, drop-in replacement for ElementTree. |
For any serious XML work, especially involving validation or complex transformations, installing and using lxml is highly recommended. For simple, quick-and-dirty tasks, the built-in xml.etree.ElementTree is perfectly capable.
