wolffd@0: function [ s ] = xml2struct( file )
wolffd@0: %Convert xml file into a MATLAB structure
wolffd@0: % [ s ] = xml2struct( file )
wolffd@0: %
wolffd@0: % A file containing:
wolffd@0: %
wolffd@0: % Some text
wolffd@0: % Some more text
wolffd@0: % Even more text
wolffd@0: %
wolffd@0: %
wolffd@0: % Will produce:
wolffd@0: % s.XMLname.Attributes.attrib1 = "Some value";
wolffd@0: % s.XMLname.Element.Text = "Some text";
wolffd@0: % s.XMLname.DifferentElement{1}.Attributes.attrib2 = "2";
wolffd@0: % s.XMLname.DifferentElement{1}.Text = "Some more text";
wolffd@0: % s.XMLname.DifferentElement{2}.Attributes.attrib3 = "2";
wolffd@0: % s.XMLname.DifferentElement{2}.Attributes.attrib4 = "1";
wolffd@0: % s.XMLname.DifferentElement{2}.Text = "Even more text";
wolffd@0: %
wolffd@0: % Please note that the following characters are substituted
wolffd@0: % '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_'
wolffd@0: %
wolffd@0: % Written by W. Falkena, ASTI, TUDelft, 21-08-2010
wolffd@0: % Attribute parsing speed increased by 40% by A. Wanner, 14-6-2011
wolffd@0: % Added CDATA support by I. Smirnov, 20-3-2012
wolffd@0: %
wolffd@0: % Modified by X. Mo, University of Wisconsin, 12-5-2012
wolffd@0:
wolffd@0: if (nargin < 1)
wolffd@0: clc;
wolffd@0: help xml2struct
wolffd@0: return
wolffd@0: end
wolffd@0:
wolffd@0: if isa(file, 'org.apache.xerces.dom.DeferredDocumentImpl') || isa(file, 'org.apache.xerces.dom.DeferredElementImpl')
wolffd@0: % input is a java xml object
wolffd@0: xDoc = file;
wolffd@0: else
wolffd@0: %check for existance
wolffd@0: if (exist(file,'file') == 0)
wolffd@0: %Perhaps the xml extension was omitted from the file name. Add the
wolffd@0: %extension and try again.
wolffd@0: if (isempty(strfind(file,'.xml')))
wolffd@0: file = [file '.xml'];
wolffd@0: end
wolffd@0:
wolffd@0: if (exist(file,'file') == 0)
wolffd@0: error(['The file ' file ' could not be found']);
wolffd@0: end
wolffd@0: end
wolffd@0: %read the xml file
wolffd@0: xDoc = xmlread(file);
wolffd@0: end
wolffd@0:
wolffd@0: %parse xDoc into a MATLAB structure
wolffd@0: s = parseChildNodes(xDoc);
wolffd@0:
wolffd@0: end
wolffd@0:
wolffd@0: % ----- Subfunction parseChildNodes -----
wolffd@0: function [children,ptext,textflag] = parseChildNodes(theNode)
wolffd@0: % Recurse over node children.
wolffd@0: children = struct;
wolffd@0: ptext = struct; textflag = 'Text';
wolffd@0: if hasChildNodes(theNode)
wolffd@0: childNodes = getChildNodes(theNode);
wolffd@0: numChildNodes = getLength(childNodes);
wolffd@0:
wolffd@0: for count = 1:numChildNodes
wolffd@0: theChild = item(childNodes,count-1);
wolffd@0: [text,name,attr,childs,textflag] = getNodeData(theChild);
wolffd@0:
wolffd@0: if (~strcmp(name,'#text') && ~strcmp(name,'#comment') && ~strcmp(name,'#cdata_dash_section'))
wolffd@0: %XML allows the same elements to be defined multiple times,
wolffd@0: %put each in a different cell
wolffd@0: if (isfield(children,name))
wolffd@0: if (~iscell(children.(name)))
wolffd@0: %put existsing element into cell format
wolffd@0: children.(name) = {children.(name)};
wolffd@0: end
wolffd@0: index = length(children.(name))+1;
wolffd@0: %add new element
wolffd@0: children.(name){index} = childs;
wolffd@0: textFieldNames = fieldnames(text);
wolffd@0: for t = 1:length(textFieldNames)
wolffd@0: textFieldName = textFieldNames{t};
wolffd@0: children.(name){index}.(textFieldName) = text.(textFieldName);
wolffd@0: end
wolffd@0: if(~isempty(attr))
wolffd@0: children.(name){index}.('Attributes') = attr;
wolffd@0: end
wolffd@0: else
wolffd@0: %add previously unknown (new) element to the structure
wolffd@0: children.(name) = childs;
wolffd@0: if(~isempty(text) && ~isempty(fieldnames(text)))
wolffd@0: textFieldNames = fieldnames(text);
wolffd@0: numTextFieldNames = length( textFieldNames );
wolffd@0: for i = 1:numTextFieldNames
wolffd@0: thisFieldName = textFieldNames{i};
wolffd@0: children.(name).(thisFieldName) = text.(thisFieldName);
wolffd@0: end
wolffd@0: end
wolffd@0: if(~isempty(attr))
wolffd@0: children.(name).('Attributes') = attr;
wolffd@0: end
wolffd@0: end
wolffd@0: else
wolffd@0: ptextflag = 'Text';
wolffd@0: if (strcmp(name, '#cdata_dash_section'))
wolffd@0: ptextflag = 'CDATA';
wolffd@0: elseif (strcmp(name, '#comment'))
wolffd@0: ptextflag = 'Comment';
wolffd@0: end
wolffd@0:
wolffd@0: %this is the text in an element (i.e., the parentNode)
wolffd@0: if (~isempty(regexprep(text.(textflag),'[\s]*','')))
wolffd@0: if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag)))
wolffd@0: ptext.(ptextflag) = text.(textflag);
wolffd@0: else
wolffd@0: %what to do when element data is as follows:
wolffd@0: %Text More text
wolffd@0:
wolffd@0: %put the text in different cells:
wolffd@0: % if (~iscell(ptext)) ptext = {ptext}; end
wolffd@0: % ptext{length(ptext)+1} = text;
wolffd@0:
wolffd@0: %just append the text
wolffd@0: ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)];
wolffd@0: end
wolffd@0: end
wolffd@0: end
wolffd@0:
wolffd@0: end
wolffd@0: end
wolffd@0: end
wolffd@0:
wolffd@0: % ----- Subfunction getNodeData -----
wolffd@0: function [text,name,attr,childs,textflag] = getNodeData(theNode)
wolffd@0: % Create structure of node info.
wolffd@0:
wolffd@0: %make sure name is allowed as structure name
wolffd@0: name = toCharArray(getNodeName(theNode))';
wolffd@0: name = strrep(name, '-', '_dash_');
wolffd@0: name = strrep(name, ':', '_colon_');
wolffd@0: name = strrep(name, '.', '_dot_');
wolffd@0:
wolffd@0: attr = parseAttributes(theNode);
wolffd@0: if (isempty(fieldnames(attr)))
wolffd@0: attr = [];
wolffd@0: end
wolffd@0:
wolffd@0: %parse child nodes
wolffd@0: [childs,text,textflag] = parseChildNodes(theNode);
wolffd@0:
wolffd@0: if (isempty(fieldnames(childs)) && isempty(fieldnames(text)))
wolffd@0: %get the data of any childless nodes
wolffd@0: % faster than if any(strcmp(methods(theNode), 'getData'))
wolffd@0: % no need to try-catch (?)
wolffd@0: % faster than text = char(getData(theNode));
wolffd@0: text.(textflag) = toCharArray(getTextContent(theNode))';
wolffd@0: end
wolffd@0:
wolffd@0: end
wolffd@0:
wolffd@0: % ----- Subfunction parseAttributes -----
wolffd@0: function attributes = parseAttributes(theNode)
wolffd@0: % Create attributes structure.
wolffd@0:
wolffd@0: attributes = struct;
wolffd@0: if hasAttributes(theNode)
wolffd@0: theAttributes = getAttributes(theNode);
wolffd@0: numAttributes = getLength(theAttributes);
wolffd@0:
wolffd@0: for count = 1:numAttributes
wolffd@0: %attrib = item(theAttributes,count-1);
wolffd@0: %attr_name = regexprep(char(getName(attrib)),'[-:.]','_');
wolffd@0: %attributes.(attr_name) = char(getValue(attrib));
wolffd@0:
wolffd@0: %Suggestion of Adrian Wanner
wolffd@0: str = toCharArray(toString(item(theAttributes,count-1)))';
wolffd@0: k = strfind(str,'=');
wolffd@0: attr_name = str(1:(k(1)-1));
wolffd@0: attr_name = strrep(attr_name, '-', '_dash_');
wolffd@0: attr_name = strrep(attr_name, ':', '_colon_');
wolffd@0: attr_name = strrep(attr_name, '.', '_dot_');
wolffd@0: attributes.(attr_name) = str((k(1)+2):(end-1));
wolffd@0: end
wolffd@0: end
wolffd@0: end