我们只需要一个递归的函数即可:
def extract_dict_value(xml_tag):
"""获得标签下的全部信息"""
if xml_tag.nodeType == 3: # TEXT 标签
print("node_data:", xml_tag.data.strip()) if xml_tag.data.strip() else None
elif xml_tag.nodeType == 1: # 节点标签
print('Node Name:', xml_tag.nodeName.strip()) if xml_tag.nodeName.strip() else None
for child_tag in xml_tag.childNodes:
extract_dict_value(child_tag)
示例代码
新建一个文件file.xml
,内容如下:
<?xml version="1.0" encoding="utf-8"?>
<user>
<name age="12">小明</name>
<account username="xm" passwd='123456'>
<code>python</code>
<code>java</code>
</account>
<name age="21">小红</name>
<account username="xh" passwd='123456'>
<code>python</code>
<code>scala</code>
</account>
</user>
from xml.dom import minidom
def extract_dict_value(xml_tag):
"""获得标签下的全部信息"""
if xml_tag.nodeType == 3: # TEXT 标签
print("node_data:", xml_tag.data.strip()) if xml_tag.data.strip() else None
elif xml_tag.nodeType == 1: # 节点标签
print('Node Name:', xml_tag.nodeName.strip()) if xml_tag.nodeName.strip() else None
for child_tag in xml_tag.childNodes:
extract_dict_value(child_tag)
def main():
file_name = "file.xml"
document = minidom.parse(open(file_name, 'r'))
user_document = document.getElementsByTagName("user")[0] # 解析到user标签下
extract_dict_value(user_document)
if __name__ == '__main__':
main()
得到结果:
Node Name: user
Node Name: name
node_data: 小明
Node Name: account
Node Name: code
node_data: python
Node Name: code
node_data: java
Node Name: name
node_data: 小红
Node Name: account
Node Name: code
node_data: python
Node Name: code
node_data: scala