'Get path to all XML's nodes

I have an example XML as follows:

<message>
  <metadata> 
    <msg_id>1</msg_id>
    <client_type>type1</client_type>
  </metadata>
  <individual>
    <name>John</name>
    <surname>Smith</surname>
      <additional_information>
        <e_mail>[email protected]</e_mail>
        <phone_number>11110000</phone_number>
      </additional_information>
  </individual>
</message>

my goal is to get output which will show me the path to every element XML like here:

/message/metadata/msg_id
/message/metadata/client_type
/message/individual/name

and so one. How can I handle this in Java?

Thanks a lot in advance for any hints!



Solution 1:[1]

you can you xPath to select nodes by expressions and print the path of each nodes,

Here is the java code :

public static void main(String[] args) throws Exception {
    File file = new File("src/main/resources/file.xml");
    XPath xPath =  XPathFactory.newInstance().newXPath();
    String expression = "//*[not(*)]";

    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = builderFactory.newDocumentBuilder();
    Document document = builder.parse(file);
    document.getDocumentElement().normalize();

    NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(document, XPathConstants.NODESET);
    for(int i = 0 ; i < nodeList.getLength(); i++) {
        System.out.println(getXPath(nodeList.item(i)));
    }
}

private static String getXPath(Node node) {
    Node parent = node.getParentNode();
    if (parent == null) {
        return node.getNodeName();
    }
    return getXPath(parent) + "/" + node.getNodeName();
}

The final output is :

document/message/metadata/msg_id

document/message/metadata/client_type

document/message/individual/name

document/message/individual/surname

Solution 2:[2]

Can't comment yet (no points yet), but the previously supplied answer doesn't address Nodes inside an array. the following will determine the full Path to a node when the child node being checked is inside an array element of some sort

public static void main(String[] args) throws Exception {
   File file = new File("src/main/resources/file.xml");
   XPath xPath =  XPathFactory.newInstance().newXPath();
   String expression = "//*[not(*)]";

   DocumentBuilderFactory builderFactory = 
   DocumentBuilderFactory.newInstance();
   DocumentBuilder builder = builderFactory.newDocumentBuilder();
   Document document = builder.parse(file);
   document.getDocumentElement().normalize();

   NodeList nodeList = (NodeList) 
   xPath.compile(expression).evaluate(document, XPathConstants.NODESET);
   for(int i = 0 ; i < nodeList.getLength(); i++) {
      System.out.println(getNodePath(nodeList.item(i)));
   }
}

/**
 * Builds the Path to the Node in the XML Structure.
 *
 * @param node Child {@link Node}
 * @return {@link String} representation of Path to XML Node.
 */
public String getNodePath(Node node) {
    if(node == null) {
        throw new IllegalArgumentException("Node cannot be null");
    }
    StringBuilder pathBuilder = new StringBuilder("/");
    pathBuilder.append(node.getNodeName());

    Node currentNode = node;

    if(currentNode.getNodeType() != Node.DOCUMENT_NODE) {
        while (currentNode.getParentNode() != null) {
            currentNode = currentNode.getParentNode();

            if(currentNode.getNodeType() == Node.DOCUMENT_NODE) {
                break;
            } else if(getIndexOfArrayNode(currentNode) != null) {
                pathBuilder.insert(0, "/" + currentNode.getNodeName() + "[" + getIndexOfArrayNode(currentNode) + "]");
            } else {
                pathBuilder.insert(0, "/" + currentNode.getNodeName());
            }
        }
    }

    return pathBuilder.toString();
}

/**
 * TODO - doesn't handle Formatted XML - treats formatting as Text Nodes and needs to skip these.
 *
 * Light node test to see if Node is part of an Array of Elements.
 *
 * @param node {@link Node}
 * @return True if part of an array. Otherwise false.
 */
private boolean isArrayNode(Node node) {
    if (node.getNextSibling() == null && node.getPreviousSibling() == null) {
        // Node has no siblings
        return false;
    } else {
        // Check if node siblings are of the same name. If so, then we are inside an array.
        return (node.getNextSibling() != null && node.getNextSibling().getNodeName().equalsIgnoreCase(node.getNodeName()))
                || (node.getPreviousSibling() != null && node.getPreviousSibling().getNodeName().equalsIgnoreCase(node.getNodeName()));
    }
}

/**
 *  TODO - doesn't handle Formatted XML - treats formatting as Text Nodes and needs to skip these.
 *  Figures out the Index of the Array Node.
 *
 *  @param node {@link Node}
 *  @return Index of element in array. Returns null if not inside an array.
 */
private Integer getIndexOfArrayNode(Node node) {
    if(isArrayNode(node)) {
        int leftCount = 0;

        Node currentNode = node.getPreviousSibling();

        while(currentNode != null) {
            leftCount++;
            currentNode = currentNode.getPreviousSibling();
        }
        return leftCount;
    } else {
        return null;
    }
}

outputs would be something like this (potentially):

/messages[0]/message/metadata/msg_id
/messages[0]/message/metadata/client_type
/messages[0]/message/individual/name
/messages[1]/message/metadata/msg_id
/messages[1]/message/metadata/client_type
/messages[1]/message/individual/name
etc.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2 Jason Smiley