'XmlNodeReader:
Option Public
Option Explicit
%REM
The XmlNodeReader class, an easy way to parse XML in LotusScript.
Please see the lsdoc_description sub for usage details and examples,
or better yet, download LotusScript.doc from http://www.lsdoc.org and
generate some documentation!
version 1.0
Oct. 25, 2008
Copyright (c) 2008 Julian Robichaux (http://www.nsftools.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
%END REM
'/**
' * NodePathParser is a helper class used by XmlNodeReader that
' * converts a "nodeName.nodeName.nodeName" type string to an
' * array of node names, and pulls out the attribute name and last
' * node name for reference. The special rules for a "node path" are:
' *
' * - node names and attribute names are separated by a period (".")
' * - attribute names must begin with "@" (like "@href" for the "href" attribute)
' * - attribute names must be the last part of the node path
' * - if a node name or attribute name contains a period, replace the period
' * with two periods (so the node name "author.name" becomes "author..name"
' * in the node path)
' *
' * Normally this class will not need to be used directly.
' */
Class NodePathParser
Private nodePath As String
Private nparr As Variant
Private attName As String
Private nodeName As String
'/**
' * Creates a new NodePathParser from the given node path string.
' */
Public Sub New (nodePath As String)
Call ParseNodePath(nodePath)
End Sub
'/**
' * Returns the node path string that was passed when this object was
' * created, or when parseNodePath() was called, with all spaces stripped
' * and any trailing attribute name removed.
' */
Public Function getPath () As String
getPath = nodePath
End Function
'/**
' * Returns a string array with one element for each node name in the
' * node path. If the node path is empty (or only contains an attribute name)
' * this will return an array with a single empty string element.
' */
Public Function getArray () As Variant
getArray = nparr
End Function
'/**
' * Returns the last (right-most) node name in the node path, if any.
' */
Public Function getNodeName () As String
getNodeName = nodeName
End Function
'/**
' * Returns the attribute name in the node path, if any.
' */
Public Function getAttName () As String
getAttName = attName
End Function
'/**
' * Returns a string array with one element for each node name in the
' * node path, and also sets all the internal references in this object to
' * reflect this node path. This method is called by the New() sub when
' * the object is instantiated.
' */
Public Function parseNodePath (np As String) As Variant
nodePath = Replace(np, " ", "")
If (Instr(nodePath, ".@") > 0) Then
attName = Strright(nodePath, ".@")
nodePath = Strleft(nodePath, ".@")
Elseif (Left(nodePath, 1) = "@") Then
attName = Mid(nodePath, 2)
nodePath = ""
End If
nparr = SplitNodePath(nodePath)
nodeName = nparr( Ubound(nparr) )
attName = Replace(attName, "..", ".")
End Function
'/**
' * Internal function used to convert a node path to an array, also changing
' * ".." references in node names to ".".
' */
Private Function SplitNodePath (Byval npath As String) As Variant
Dim arr As Variant
Dim i As Integer
npath = Replace(npath, "..", Chr(1))
arr = Split(npath, ".")
For i = 0 To Ubound(arr)
arr(i) = Fulltrim( Replace(arr(i), Chr(1), ".") )
Next
SplitNodePath = arr
End Function
End Class
'/**
' * The XmlNodeReader class is meant to be an easy interface for getting
' * data out of XML. Internally it uses a NotesDomParser to process the XML
' * and steps through the DOM tree to read nodes and attributes.
' *
' * Please see the lsdoc_description sub for usage details and examples,
' * or better yet, download LotusScript.doc from http://www.lsdoc.org and
' * generate some documentation!
' */
Class XmlNodeReader
Private mynode As NotesDOMNode
Private lastError As String
Private readError As String
'/**
' * Creates a new, empty XmlNodeReader. Note that you can still call
' * get(), getNode(), etc. on an empty XmlNodeReader and not generate
' * LotusScript runtime errors -- method calls will simply return empty strings
' * or empty references.
' */
Public Sub New ()
'** nothing to initialize here
End Sub
'/**
' * Returns the NotesDomNode that this object uses as its base node.
' */
Property Get thisNode () As NotesDOMNode
Set thisNode = mynode
End Property
'/**
' * Returns the name of the node used as this object's base node.
' */
Property Get thisNodeName () As String
On Error Resume Next
thisNodeName = mynode.NodeName
End Property
'/**
' * Returns True if this object does not contain a base node reference
' * or if the base node's isNull property is True, or False otherwise.
' * This is useful in determining if a Read operation was successful.
' */
Property Get isEmpty () As Boolean
If (mynode Is Nothing) Then
Me.isEmpty = True
Elseif mynode.IsNull Then
Me.isEmpty = True
End If
End Property
'/**
' * Returns the last error string that was generated internal to this class.
' * If a parsing error occurred when the initial Read operation was performed,
' * this will always return the parsing error. Note that error strings other than
' * the parse error are cleared at the start of each Get-type method call,
' * so if an error happens at one Get and then a second Get is successful
' * then the last error string will be empty.
' */
Public Function getLastError () As String
If (Len(readError) > 0) Then
getLastError = readError
Else
getLastError = lastError
End If
End Function
'/**
' * Internal method used to set the private lastError string. By default, errors
' * that occur inside of this class are handled and discarded silently. If you
' * want to add error logging or notification, or if you'd rather throw runtime
' * errors instead of ignoring them, you should modify this method (or,
' * a better way is to create a subclass of this class and override the method).
' */
Private Function setLastError (msg As String, error_num As Integer) As String
'** subclasses can override this method to add logging, notification, etc.
lastError = msg
End Function
'/**
' * Reads a NotesDomNode in to this object for parsing.
' */
Public Function ReadNode (n As NotesDOMNode) As XmlNodeReader
readError = ""
Call setLastError("", 0)
Set mynode = n
Set ReadNode = Me
End Function
'/**
' * Reads a NotesStream of XML data in to this object for parsing.
' * Note that if there is ANY leading whitespace at the beginning of
' * the stream, parsing will probably fail.
' */
Public Function ReadStream (s As NotesStream) As XmlNodeReader
On Error Goto processError
Dim session As New NotesSession
Dim outputStream As NotesStream
Dim domParser As NotesDOMParser
Dim docNode As NotesDOMDocumentNode
Set mynode = Nothing
Set outputStream = session.CreateStream
Set domParser = session.CreateDOMParser(s, outputStream)
domParser.InputValidationOption = VALIDATE_NEVER
domParser.Process
Set docNode = domParser.Document
Set ReadStream = ReadNode(docNode)
Exit Function
processError:
If (domParser Is Nothing) Then
readError = "ReadStream error on line " & Erl & ": " & Error
Call setLastError(readError, Err)
Elseif (domParser.Log = "") Then
readError = "ReadStream error on line " & Erl & ": " & Error
Call setLastError(readError, Err)
Else
readError = "ReadStream parsing error : " & domParser.Log
Call setLastError(readError, Err)
End If
Set ReadStream = Me
Exit Function
End Function
'/**
' * Reads a text file of XML data in to this object for parsing.
' * Note that if there is ANY leading whitespace at the beginning of
' * the file, parsing will probably fail.
' */
Public Function ReadFile (fileName As String) As XmlNodeReader
Dim session As New NotesSession
Dim stream As NotesStream
Set stream = session.CreateStream
If stream.Open(fileName) Then
Call ReadStream(stream)
Call stream.Close
Else
Set mynode = Nothing
readError = "Could not open file: " & fileName
Call setLastError(readError, 9999)
End If
Set ReadFile = Me
End Function
'/**
' * Reads a string of XML data in to this object for parsing.
' */
Public Function ReadText (txt As String) As XmlNodeReader
Dim session As New NotesSession
Dim stream As NotesStream
Set stream = session.CreateStream
'** ANY leading whitespace in the text causes parsing to fail
If (Instr(txt, "<") > 1) Then
Call stream.WriteText(Mid$(txt, Instr(txt, "<")))
Else
Call stream.WriteText(txt)
End If
stream.Position = 0
Set ReadText = ReadStream(stream)
Call stream.Close
End Function
'/**
' * Internal function to get the node described by a node path array,
' * based on the given parent node. Normally the parent node will be
' * the internal mynode reference, but it might be useful later if we add
' * a method like getSubnodeArray("feed.entry", "author") to get all
' * subnodes of a set of all nodes that match a path.
' */
Private Function GetNodeFromPathArray (parent As NotesDOMNode, nparr As Variant) As NotesDOMNode
On Error Goto processError
Dim tempNode As NotesDOMNode
Dim i As Integer
Call setLastError("", 0)
If Not Isarray(nparr) Then
Exit Function
End If
Set tempNode = parent
For i = 0 To Ubound(nparr)-1
Set tempNode = findFirstChildNode(tempNode, nparr(i))
Next
If (tempNode Is Nothing) Then
Call setLastError("Node path not found: " & Join(nparr, "."), 9990)
End If
Set GetNodeFromPathArray = tempNode
Exit Function
processError:
Call setLastError("GetNodeFromPathArray error on line " & Erl & ": " & Error, Err)
Exit Function
End Function
'/**
' * Returns the text of the FIRST node or attribute that matches this
' * node path, if any. An empty string is returned if the node or
' * attribute is not found.
' */
Public Function get (nodePath As String) As String
Dim npp As New NodePathParser(nodePath)
Dim n As NotesDOMNode
Set n = getNode(npp.getPath())
If (Len(npp.getAttName()) > 0) Then
Me.get = getAttributeText(n, npp.getAttName())
Else
Me.get = getNodeText(n)
End If
End Function
'/**
' * Returns a string array containing the text of ALL nodes or attributes
' * that match this node path, if any. An array containing a single empty
' * string is returned if the node or attribute is not found.
' */
Public Function getAll (nodePath As String) As Variant
Dim npp As New NodePathParser(nodePath)
Dim attName As String
Dim narr As Variant
Dim arr() As String
Dim i As Integer
attName = npp.getAttName()
narr = getNodes(npp.getPath())
Redim arr( Ubound(narr) )
For i = 0 To Ubound(narr)
If (Len(attName) > 0) Then
arr(i) = getAttributeText(narr(i), attName)
Else
arr(i) = getNodeText(narr(i))
End If
Next
getAll = arr
End Function
'/**
' * Returns a string array containing the name of all child nodes (only direct
' * children, not grandchildren or great-grandchildren or below) of the node
' * that matches this node path, if any. An array containing a single empty
' * string is returned if the node is not found, or there are no child nodes.
' */
Public Function getSubNodeNames (nodePath As String) As Variant
Dim n As NotesDOMNode
Dim child As NotesDOMNode
Dim arr() As String
Dim i As Integer
Set n = getNode(nodePath)
Redim arr(0)
If (n Is Nothing) Then
getSubNodeNames = arr
Exit Function
End If
Set child = n.FirstChild
Do Until (child.IsNull)
If (child.NodeType = DOMNODETYPE_ELEMENT_NODE) Then
Redim Preserve arr(i)
arr(i) = child.NodeName
i = i + 1
End If
Set child = child.NextSibling
Loop
getSubNodeNames = arr
End Function
'/**
' * Returns a string array containing the name of all attributes for the node
' * that matches this node path, if any. An array containing a single empty
' * string is returned if the node is not found, or if there are no attributes.
' */
Public Function getAttributeNames (nodePath As String) As Variant
Dim n As NotesDOMNode
Dim attrList As NotesDOMNamedNodeMap
Dim attr As NotesDOMNode
Dim arr() As String
Dim i As Integer
Set n = getNode(nodePath)
Redim arr(0)
If (n Is Nothing) Then
getAttributeNames = arr
Exit Function
End If
Set attrList = n.Attributes
Redim arr(attrList.NumberOfEntries - 1)
For i = 1 To attrList.NumberOfEntries
Set attr = attrList.GetItem(i)
arr(i-1) = attr.NodeName
Next
getAttributeNames = arr
End Function
'/**
' * Returns the FIRST node that matches this node path as a
' * NotesDomNode. An uninitialized node is returned if the node
' * is not found, so check for "node Is Nothing" on the return value.
' */
Public Function getNode (nodePath As String) As NotesDOMNode
Dim npp As New NodePathParser(nodePath)
Dim n As NotesDOMNode
Set n = GetNodeFromPathArray(mynode, npp.getArray())
If (Len(npp.getNodeName()) > 0) Then
Set n = findFirstChildNode(n, npp.getNodeName())
End If
Set getNode = n
End Function
'/**
' * Returns ALL nodes that match this node path as an array of
' * NotesDomNode. An array containing a single uninitialized node
' * is returned if the node is not found.
' */
Public Function getNodes (nodePath As String) As Variant
Dim npp As New NodePathParser(nodePath)
Dim n As NotesDOMNode
Dim arr() As NotesDOMNode
Dim i As Integer
Set n = getNode(nodePath)
Redim arr(0)
If (n Is Nothing) Then
getNodes = arr
Exit Function
End If
Do Until (n.IsNull)
If (n.NodeType = DOMNODETYPE_ELEMENT_NODE) And _
(n.NodeName = npp.getNodeName()) Then
Redim Preserve arr(i)
Set arr(i) = n
i = i + 1
End If
Set n = n.NextSibling
Loop
getNodes = arr
End Function
'/**
' * Returns the FIRST node that matches this node path as an
' * XmlNodeReader. An empty XmlNodeReader is returned if the node
' * is not found, so check for "reader.IsEmpty" on the return value.
' */
Function getNodeReader (nodePath As String) As XmlNodeReader
Dim xnr As New XmlNodeReader
Call xnr.ReadNode( getNode(nodePath) )
Set getNodeReader = xnr
End Function
'/**
' * Returns ALL nodes that match this node path as an array of
' * XmlNodeReader. An array containing a single empty XmlNodeReader
' * is returned if the node is not found.
' */
Function getNodeReaders (nodePath As String) As Variant
Dim xarr() As XmlNodeReader
Dim narr As Variant
Dim i As Integer
narr = getNodes(nodePath)
Redim xarr( Ubound(narr) )
For i = 0 To Ubound(narr)
Set xarr(i) = New XmlNodeReader
Call xarr(i).ReadNode( narr(i) )
Next
getNodeReaders = xarr
End Function
'====================================================
'== XML Helper Functions -- used internally, but Public in case
'== they're useful to you.
'====================================================
'/**
' * Returns the text of the given node. If the node is not valid,
' * an empty string is returned. If there are subnodes and
' * whitespace in the formatting of the XML between subnodes,
' * the concatenated whitespace will be returned as well. However,
' * only the text directly beneath the node will be returned, not the
' * text beneath any subnodes.
' */
Function getNodeText (node As NotesDOMNode) As String
Dim child As NotesDOMNode
Dim childText As String
If (node Is Nothing) Then
Exit Function
Elseif (node.IsNull) Then
Exit Function
End If
Set child = node.FirstChild
Do Until (child.IsNull)
If (child.NodeType = DOMNODETYPE_TEXT_NODE) Then
childText = childText + child.NodeValue
Elseif (child.NodeType = DOMNODETYPE_CDATASECTION_NODE) Then
childText = childText + child.NodeValue
End If
Set child = child.NextSibling
Loop
getNodeText = childText
End Function
'/**
' * Returns the text of the given attribute. If the node is not valid or the
' * attribute is not found, an empty string is returned. The attribute name
' * is NOT case-sensitive, to prevent silly errors.
' */
Function getAttributeText (node As NotesDOMNode, attrName As String) As String
Dim attrList As NotesDOMNamedNodeMap
Dim attr As NotesDOMNode
Dim attrValue As String
Dim i As Integer
If (node Is Nothing) Then
Exit Function
Elseif (node.IsNull) Then
Exit Function
End If
Set attrList = node.Attributes
For i = 1 To attrList.NumberOfEntries
Set attr = attrList.GetItem(i)
' If (attr.NodeName = attrName) Then
If (Lcase(attr.NodeName) = Lcase(attrName)) Then
attrValue = attr.NodeValue
End If
Next
getAttributeText = attrValue
End Function
'/**
' * Returns the first child node with a specific name, that is directly
' * beneath the given node. This will NOT find child nodes that are more
' * than one level beneath the given node. Also, the node name being
' * searched for is NOT case-sensitive, to prevent silly errors.
' */
Function findFirstChildNode (node As NotesDOMNode, childName As String) As NotesDOMNode
Set findFirstChildNode = findChildNode(node, childName, 1)
End Function
'/**
' * Returns the nth child node with a specific name, that is directly
' * beneath the given node. For example, if a "bookshelf" node has
' * 5 "book" nodes beneath it, you would use a count parameter of 1
' * to get the first book, 2 to get the second book, etc. This will NOT find
' * child nodes that are more than one level beneath the given node.
' * Also, the node name being searched for is NOT case-sensitive,
' * to prevent silly errors.
' */
Function findChildNode (node As NotesDOMNode, childName As String, count As Integer) As NotesDOMNode
Dim child As NotesDOMNode
Dim i As Integer
If (node Is Nothing) Then
Exit Function
Elseif (node.IsNull) Then
Exit Function
End If
Set child = node.FirstChild
Do Until (child.IsNull)
' If (child.NodeName = childName) Then
If (child.NodeType = DOMNODETYPE_ELEMENT_NODE) And _
(Lcase(child.NodeName) = Lcase(childName)) Then
i = i + 1
If (i >= count) Then
Exit Do
End If
End If
Set child = child.NextSibling
Loop
Set findChildNode = child
End Function
End Class
Private Sub lsdoc_description
%REM
LotusScript.doc-enabled documentation. See http://www.lsdoc.org
The XmlNodeReader class is meant to be an easy interface for getting data
out of XML. For example, starting with the following XML in a file:
<bookshelf>
<book type="paperback">
<title>The Cathedral and the Bazaar</title>
<author.name>Eric S. Raymond</author.name>
</book>
<book type="hardback">
<title>Hackers and Painters</title>
<author.name>Paul Graham</author.name>
</book>
</bookshelf>
If you want to get the title of the first book in that list, you can do this:
Dim reader As New XmlNodeReader
Call reader.ReadFile( "c:\booklist.xml" )
Print reader.get( "bookshelf.book.title" )
For an attribute, you can do:
Print reader.get( "bookshelf.book.@type" )
If a node name or attribute has a period in it, just replace it with a double-period:
Print reader.get( "bookshelf.book.author..name" )
I'm using what I'll refer to as "node paths" to specify the specific node or
attribute you want to get the value for. It's just the format:
"nodeName.nodeName.nodeName" or "nodeName.nodeName.@attributeName".
It's sort of a poor-man's XPath. It doesn't provide nearly the power or
flexibility that XPath has, but it's good for most basic XML parsing.
If you specify a node/attribute that doesn't exist, you just get an empty
string as a result -- no errors are thrown, so you don't have to get bogged
down in error handling. If you DO want to see whether there was an
error after trying to get a node, check the result of getLastError().
You can read XML in to an XmlNodeReader using ReadText(), ReadFile(),
ReadStream(), or ReadNode(). You can get the result of a "node path"
expression as a string, a NotesDomNode, or another XmlNodeReader.
Here are some more examples of use, this time parsing an Atom feed:
Dim reader As New XmlNodeReader
Dim v As Variant
'** read some XML text
Call reader.ReadText( GetXmlText() )
v = reader.get( "feed.entry.id" ) '** text of the "id" node under the first "entry" node
v = reader.getAll( "feed.entry.category" ) '** text array of all "category" nodes under the first "entry" node
v = reader.get( "feed.collection.atom:title" ) '** text of a namespaced node
v = reader.getAll( "feed.entry.link.@href" ) '** text array of "href" attribute for all "link" nodes under the first "entry" node
v = reader.getAttributeNames( "feed.generator" ) '** text array of all attribute names of the first "generator" node
v = reader.getSubNodeNames( "feed.collection" ) '** text array of all child node names of the first "collection" node
'** get the title of all the "entry" child nodes
Dim arr As Variant
arr = reader.getNodeReaders("feed.entry")
Forall nr In arr
v = nr.get("title")
End Forall
'** read one of the child nodes from above
Dim reader2 As New XmlNodeReader
'** you could also use reader2 = reader.getNodeReader( "feed.entry" ) here
Call reader2.ReadNode( reader.getNode("feed.entry") ) '** read a NotesDomNode
v = reader2.get( "id" ) '** text of the first "id" node beneath the base node
v = reader2.get( "@xml:lang" ) '** attribute of the base node itself
v = reader2.get( "" ) '** text of the base node (may be a lot of whitespace)
v = reader2.get( "author.email" ) '** text of a child node
'** inline function chaining examples
v = reader.getNodeReader( "feed.entry" ).get( "author.email" )
v = reader.getNodeReaders( "feed.entry" )(1).get( "summary" )
v = reader2.readText( GetXmlText2() ).get( "feed.generator" )
'** check for parse errors
If reader2.isEmpty Then
Messagebox reader2.getLastError()
End If
'** read a 5 MB DXL file (takes a few seconds to read in)
Call reader2.ReadFile( "C:\dbexport.xml" )
v = reader2.get( "database.@title" )
As with any DOM parsing venture, be careful with large files. I did some tests
with a 5 MB XML file and that was a little slow but it worked.
Also, if you use ReadStream() to parse an XML NotesStream that you just
created, be aware of this caveat from the Domino Designer Help:
"You cannot explicitly read or write a NotesStream object associated with a
file prior to using it for XML input or output. For example, if you write to a file
then use it for XML input, you must close and reopen the NotesStream
object."
XmlNodeReader uses NotesDomParser to parse XML internally, so if you've
just written some XML to a file be sure to close and reopen it before passing
it to ReadStream() -- or just close it and use ReadFile().
version 1.0
Oct. 25, 2008
Copyright (c) 2008 Julian Robichaux, http://www.nsftools.com
This code is licensed under the terms of the MIT License, available at
http://www.opensource.org/licenses/mit-license.php
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
@version 1.0
@author Julian Robichaux, http://www.nsftools.com
%END REM
End Sub