'If any potential issues using xpath find/update xml tag text
I wrote a script, based on some of the existing StackOverflow questions, but no one perfectly fit my issues. The user use xpath to find a XML tag from a given XML file, and update the tag text based on user inputs. Below is my script using Python 3 (The most difficult part for me is around the namespaces):
import xml.etree.ElementTree as ET
import sys
# user inputs and variables
filename = 'actors.xml'
xpath = 'actor/name'
value = 'test name'
temp_namespace = 'temp_namespace'
# get all namespaces
all_namespaces = dict([node for _, node in ET.iterparse(filename, events=['start-ns'])])
# register namespace
for key in all_namespaces.keys():
ET.register_namespace(key, all_namespaces[key])
# remove all namespace from elements first
# and temp save it to tag attribute
# The below logic is copied from other Stackoverflow answers
# From **Python 3.8**, we can add the parser to insert comments
it = ET.iterparse(filename, parser=ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)))
for _, el in it:
prefix, has_namespace, postfix = el.tag.partition('}')
if has_namespace:
el.tag = postfix
el.set(temp_namespace, prefix + has_namespace)
# find and update
root = it.root
for el in root.findall(xpath):
el.text = str(value)
# get xml comments before root level
doc_comments = []
with open(filename, 'r') as f:
lines = f.readlines()
for line in lines:
if line.startswith('<?xml'):
continue
if line.startswith('<' + root.tag):
break
else:
doc_comments.append(line)
def add_tag_namespace(el):
for sub_el in el:
if temp_namespace in sub_el.attrib.keys():
sub_el.tag = sub_el.attrib[temp_namespace] + sub_el.tag
del sub_el.attrib[temp_namespace]
add_tag_namespace(sub_el)
if temp_namespace in el.attrib.keys():
el.tag = el.attrib[temp_namespace] + el.tag
del el.attrib[temp_namespace]
# add all namespace back
# and delete the temp namespace attribute
add_tag_namespace(root)
# write back to xml file
tree = ET.ElementTree(root)
tree.write(filename, encoding='unicode', xml_declaration=True)
if len(doc_comments) == 0:
sys.exit()
# write xml comments before root back
lines = []
# first read all lines
with open(filename, 'r') as f:
lines = f.readlines()
# second, insert xml comments back into memory
for i, line in enumerate(lines):
if line.startswith('<?xml'):
insert_at = i + 1
for comment in doc_comments:
lines.insert(insert_at, comment)
insert_at += 1
break
# finally, write all contents to file
with open(filename, 'w') as f:
for line in lines:
f.write(line)
actors.xml:
<?xml version="1.0"?>
<actors xmlns:fictional="http://characters.example.com"
xmlns="http://people.example.com">
<actor>
<name>John Cleese</name>
<fictional:character>Lancelot</fictional:character>
<fictional:character>Archie Leach</fictional:character>
</actor>
<actor>
<name>Eric Idle</name>
<fictional:character>Sir Robin</fictional:character>
<fictional:character>Gunther</fictional:character>
<fictional:character>Commander Clement</fictional:character>
</actor>
</actors>
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|