Mercurial > hg > camir-aes2014
diff core/tools/xml2struct.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/tools/xml2struct.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,190 @@ +function [ s ] = xml2struct( file ) +%Convert xml file into a MATLAB structure +% [ s ] = xml2struct( file ) +% +% A file containing: +% <XMLname attrib1="Some value"> +% <Element>Some text</Element> +% <DifferentElement attrib2="2">Some more text</Element> +% <DifferentElement attrib3="2" attrib4="1">Even more text</DifferentElement> +% </XMLname> +% +% Will produce: +% s.XMLname.Attributes.attrib1 = "Some value"; +% s.XMLname.Element.Text = "Some text"; +% s.XMLname.DifferentElement{1}.Attributes.attrib2 = "2"; +% s.XMLname.DifferentElement{1}.Text = "Some more text"; +% s.XMLname.DifferentElement{2}.Attributes.attrib3 = "2"; +% s.XMLname.DifferentElement{2}.Attributes.attrib4 = "1"; +% s.XMLname.DifferentElement{2}.Text = "Even more text"; +% +% Please note that the following characters are substituted +% '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_' +% +% Written by W. Falkena, ASTI, TUDelft, 21-08-2010 +% Attribute parsing speed increased by 40% by A. Wanner, 14-6-2011 +% Added CDATA support by I. Smirnov, 20-3-2012 +% +% Modified by X. Mo, University of Wisconsin, 12-5-2012 + + if (nargin < 1) + clc; + help xml2struct + return + end + + if isa(file, 'org.apache.xerces.dom.DeferredDocumentImpl') || isa(file, 'org.apache.xerces.dom.DeferredElementImpl') + % input is a java xml object + xDoc = file; + else + %check for existance + if (exist(file,'file') == 0) + %Perhaps the xml extension was omitted from the file name. Add the + %extension and try again. + if (isempty(strfind(file,'.xml'))) + file = [file '.xml']; + end + + if (exist(file,'file') == 0) + error(['The file ' file ' could not be found']); + end + end + %read the xml file + xDoc = xmlread(file); + end + + %parse xDoc into a MATLAB structure + s = parseChildNodes(xDoc); + +end + +% ----- Subfunction parseChildNodes ----- +function [children,ptext,textflag] = parseChildNodes(theNode) + % Recurse over node children. + children = struct; + ptext = struct; textflag = 'Text'; + if hasChildNodes(theNode) + childNodes = getChildNodes(theNode); + numChildNodes = getLength(childNodes); + + for count = 1:numChildNodes + theChild = item(childNodes,count-1); + [text,name,attr,childs,textflag] = getNodeData(theChild); + + if (~strcmp(name,'#text') && ~strcmp(name,'#comment') && ~strcmp(name,'#cdata_dash_section')) + %XML allows the same elements to be defined multiple times, + %put each in a different cell + if (isfield(children,name)) + if (~iscell(children.(name))) + %put existsing element into cell format + children.(name) = {children.(name)}; + end + index = length(children.(name))+1; + %add new element + children.(name){index} = childs; + textFieldNames = fieldnames(text); + for t = 1:length(textFieldNames) + textFieldName = textFieldNames{t}; + children.(name){index}.(textFieldName) = text.(textFieldName); + end + if(~isempty(attr)) + children.(name){index}.('Attributes') = attr; + end + else + %add previously unknown (new) element to the structure + children.(name) = childs; + if(~isempty(text) && ~isempty(fieldnames(text))) + textFieldNames = fieldnames(text); + numTextFieldNames = length( textFieldNames ); + for i = 1:numTextFieldNames + thisFieldName = textFieldNames{i}; + children.(name).(thisFieldName) = text.(thisFieldName); + end + end + if(~isempty(attr)) + children.(name).('Attributes') = attr; + end + end + else + ptextflag = 'Text'; + if (strcmp(name, '#cdata_dash_section')) + ptextflag = 'CDATA'; + elseif (strcmp(name, '#comment')) + ptextflag = 'Comment'; + end + + %this is the text in an element (i.e., the parentNode) + if (~isempty(regexprep(text.(textflag),'[\s]*',''))) + if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag))) + ptext.(ptextflag) = text.(textflag); + else + %what to do when element data is as follows: + %<element>Text <!--Comment--> More text</element> + + %put the text in different cells: + % if (~iscell(ptext)) ptext = {ptext}; end + % ptext{length(ptext)+1} = text; + + %just append the text + ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)]; + end + end + end + + end + end +end + +% ----- Subfunction getNodeData ----- +function [text,name,attr,childs,textflag] = getNodeData(theNode) + % Create structure of node info. + + %make sure name is allowed as structure name + name = toCharArray(getNodeName(theNode))'; + name = strrep(name, '-', '_dash_'); + name = strrep(name, ':', '_colon_'); + name = strrep(name, '.', '_dot_'); + + attr = parseAttributes(theNode); + if (isempty(fieldnames(attr))) + attr = []; + end + + %parse child nodes + [childs,text,textflag] = parseChildNodes(theNode); + + if (isempty(fieldnames(childs)) && isempty(fieldnames(text))) + %get the data of any childless nodes + % faster than if any(strcmp(methods(theNode), 'getData')) + % no need to try-catch (?) + % faster than text = char(getData(theNode)); + text.(textflag) = toCharArray(getTextContent(theNode))'; + end + +end + +% ----- Subfunction parseAttributes ----- +function attributes = parseAttributes(theNode) + % Create attributes structure. + + attributes = struct; + if hasAttributes(theNode) + theAttributes = getAttributes(theNode); + numAttributes = getLength(theAttributes); + + for count = 1:numAttributes + %attrib = item(theAttributes,count-1); + %attr_name = regexprep(char(getName(attrib)),'[-:.]','_'); + %attributes.(attr_name) = char(getValue(attrib)); + + %Suggestion of Adrian Wanner + str = toCharArray(toString(item(theAttributes,count-1)))'; + k = strfind(str,'='); + attr_name = str(1:(k(1)-1)); + attr_name = strrep(attr_name, '-', '_dash_'); + attr_name = strrep(attr_name, ':', '_colon_'); + attr_name = strrep(attr_name, '.', '_dot_'); + attributes.(attr_name) = str((k(1)+2):(end-1)); + end + end +end \ No newline at end of file