'Xml Processing with Python3
i'm working on XML processing program in Python3. i have an issue with output of my program.
i've tried changing for loops in any possible way but the output is not what it is should be. as i'm new to python I know i messed up in some place or some conditions i may not considered
My code is like this:
import csv
import xml.etree.ElementTree as ET
def xml_t(inputfile,outputfile):
tree = ET.parse(inputfile)
root = tree.getroot()
for Node in root.iter('Node'):
nodeName = Node.get('nodeName')
for LicenseItem in root.iter('LicenseItem'):
LicenseItem_child = LicenseItem.get('LicenseItem')
LicenseDesc = LicenseItem.get('LicenseDesc')
AssignedValue = LicenseItem.get('AssignedValue')
LicenseStatus = LicenseItem.get('LicenseStatus')
LicenseEndDate = LicenseItem.get('LicenseEndDate')
if LicenseItem_child == LicenseDesc == AssignedValue == LicenseStatus == LicenseEndDate == None:
continue
output_csv = (nodeName,LicenseItem_child,LicenseDesc,AssignedValue,LicenseStatus,LicenseEndDate)
with open (outputfile,'a') as files:
write = csv.writer(files, lineterminator ='\r', delimiter = ',')
write.writerow(output_csv)
if __name__ == '__main__':
inputfile = 'filename.xml'
outputfile = 'output.csv'
xml_t(inputfile,outputfile)
the output which i'm expecting is like:
nodeName,LicenseItem,LicenseDesc,AssignedValue,LicenseStatus,LicenseEndDate
TH1S9303VAS04,,,,,
ES1S9306RANH2,LLE0xxxxx1,NQA Client & NQA Server,1,Normal,PERMANENT
ES1S9306RANH2,LLE0Ixxxx1,IPV6 GTLV2,1,Normal,PERMANENT
ES1S9306RANH2,LLE0xxxxx1,MPLS,1,Normal,PERMANENT
TH1S9303DCN02,LLE0xxxxx1,NQA Client & NQA Server,1,Normal,PERMANENT
TH1S9303DCN02,LLE0xxxxx1,IPV6 GTLV2,1,Normal,PERMANENT
TH1S9303DCN02,LLE0xxxxx1,MPLS,1,Normal,PERMANENT
TH1S5310CSH2,,,,,
TH1S9303DCN01,LLE0yyyyy1,NQA Client & NQA Server,1,Normal,PERMANENT
TH1S9303DCN01,LLE0yyyyy1,IPV6 GTLV2,1,Normal,PERMANENT
TH1S9303DCN01,LLE0yyyyy1,MPLS,1,Normal,PERMANENT
TH1S5310CSH1,,,,,
ES1S9306RANH1,LLE0ttttt1,NQA Client & NQA Server,1,Normal,PERMANENT
ES1S9306RANH1,LLE0ttttt1,IPV6 GTLV2,1,Normal,PERMANENT
ES1S9306RANH1,LLE0ttttt1,MPLS,1,Normal,PERMANENT
,,,,,
and XML file which i'm working on is :
?xml version="1.0" encoding="UTF-8"?>
<LicenseInfo>
<Node nodeName="TH1S9303VAS04">
<LicenseItem details="TH1S9303VAS04" />
</Node>
<Node nodeName="ES1S9306RANH2">
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client & NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
</Node>
<Node nodeName="TH1S9303DCN02">
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client & NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
</Node>
<Node nodeName="TH1S5310CSH2">
<LicenseItem details="TH1S5310CSH2" />
</Node>
<Node nodeName="TH1S9303DCN01">
<LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="NQA Client & NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
</Node>
output without including tag "nodeName" is exactly what i want but when i add nodeName in the output file things starts to get wierd!
I would appropriate for any help. thanks in advnace
Solution 1:[1]
root.iter('nodeName') will return all the Node wiht tag 'nodeName', so your program are looping over all "LicenseItem" for every "Node". You can just change your code as below and the output will be what you wanted:
import csv
import xml.etree.ElementTree as ET
def xml_t(inputfile,outputfile):
tree = ET.parse(inputfile)
root = tree.getroot()
for Node in root.iter('Node'):
nodeName = Node.get('nodeName')
for LicenseItem in Node: # the only change, change root.iter() to Node
LicenseItem_child = LicenseItem.get('LicenseItem')
LicenseDesc = LicenseItem.get('LicenseDesc')
AssignedValue = LicenseItem.get('AssignedValue')
LicenseStatus = LicenseItem.get('LicenseStatus')
LicenseEndDate = LicenseItem.get('LicenseEndDate')
if LicenseItem_child == LicenseDesc == AssignedValue == LicenseStatus == LicenseEndDate == None:
continue
output_csv = (nodeName,LicenseItem_child,LicenseDesc,AssignedValue,LicenseStatus,LicenseEndDate)
with open (outputfile,'a') as files:
write = csv.writer(files, lineterminator ='\r', delimiter = ',')
write.writerow(output_csv)
if __name__ == '__main__':
inputfile = 'filename.xml'
outputfile = 'output.csv'
xml_t(inputfile,outputfile)
Solution 2:[2]
Here (Note that the code does not use any external library).
The code is using 'N/A' where there is an empty value. You can replace it.
import xml.etree.ElementTree as ET
xml = '''<LicenseInfo>
<Node nodeName="TH1S9303VAS04">
<LicenseItem details="TH1S9303VAS04" />
</Node>
<Node nodeName="ES1S9306RANH2">
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client & NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
</Node>
<Node nodeName="TH1S9303DCN02">
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="NQA Client & NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0xxxxx1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
</Node>
<Node nodeName="TH1S5310CSH2">
<LicenseItem details="TH1S5310CSH2" />
</Node>
<Node nodeName="TH1S9303DCN01">
<LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="NQA Client & NQA Server" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="IPV6 GTLV2" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
<LicenseItem LicenseItem="LLE0yyyyy1" LicenseDesc="MPLS" AssignedValue="1" LicenseStatus="Normal" LicenseEndDate="PERMANENT" />
</Node>
</LicenseInfo>
'''
ATTRS = ['LicenseItem', 'LicenseDesc', 'AssignedValue', 'LicenseStatus', 'LicenseEndDate']
root = ET.fromstring(xml)
nodes = root.findall('.//Node')
lines = []
for node in nodes:
node_name = node.attrib['nodeName']
license_items = node.findall('.//LicenseItem')
for item in license_items:
line = [node_name]
for attr in ATTRS:
line.append(item.attrib.get(attr, 'N/A'))
lines.append(line)
for l in lines:
print(l)
output
['TH1S9303VAS04', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A']
['ES1S9306RANH2', 'LLE0xxxxx1', 'NQA Client & NQA Server', '1', 'Normal', 'PERMANENT']
['ES1S9306RANH2', 'LLE0xxxxx1', 'IPV6 GTLV2', '1', 'Normal', 'PERMANENT']
['ES1S9306RANH2', 'LLE0xxxxx1', 'MPLS', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN02', 'LLE0xxxxx1', 'NQA Client & NQA Server', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN02', 'LLE0xxxxx1', 'IPV6 GTLV2', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN02', 'LLE0xxxxx1', 'MPLS', '1', 'Normal', 'PERMANENT']
['TH1S5310CSH2', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A']
['TH1S9303DCN01', 'LLE0yyyyy1', 'NQA Client & NQA Server', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN01', 'LLE0yyyyy1', 'IPV6 GTLV2', '1', 'Normal', 'PERMANENT']
['TH1S9303DCN01', 'LLE0yyyyy1', 'MPLS', '1', 'Normal', 'PERMANENT']
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | ToughMind |
Solution 2 | balderman |