annotate core/tools/xml2struct.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [ s ] = xml2struct( file )
wolffd@0 2 %Convert xml file into a MATLAB structure
wolffd@0 3 % [ s ] = xml2struct( file )
wolffd@0 4 %
wolffd@0 5 % A file containing:
wolffd@0 6 % <XMLname attrib1="Some value">
wolffd@0 7 % <Element>Some text</Element>
wolffd@0 8 % <DifferentElement attrib2="2">Some more text</Element>
wolffd@0 9 % <DifferentElement attrib3="2" attrib4="1">Even more text</DifferentElement>
wolffd@0 10 % </XMLname>
wolffd@0 11 %
wolffd@0 12 % Will produce:
wolffd@0 13 % s.XMLname.Attributes.attrib1 = "Some value";
wolffd@0 14 % s.XMLname.Element.Text = "Some text";
wolffd@0 15 % s.XMLname.DifferentElement{1}.Attributes.attrib2 = "2";
wolffd@0 16 % s.XMLname.DifferentElement{1}.Text = "Some more text";
wolffd@0 17 % s.XMLname.DifferentElement{2}.Attributes.attrib3 = "2";
wolffd@0 18 % s.XMLname.DifferentElement{2}.Attributes.attrib4 = "1";
wolffd@0 19 % s.XMLname.DifferentElement{2}.Text = "Even more text";
wolffd@0 20 %
wolffd@0 21 % Please note that the following characters are substituted
wolffd@0 22 % '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_'
wolffd@0 23 %
wolffd@0 24 % Written by W. Falkena, ASTI, TUDelft, 21-08-2010
wolffd@0 25 % Attribute parsing speed increased by 40% by A. Wanner, 14-6-2011
wolffd@0 26 % Added CDATA support by I. Smirnov, 20-3-2012
wolffd@0 27 %
wolffd@0 28 % Modified by X. Mo, University of Wisconsin, 12-5-2012
wolffd@0 29
wolffd@0 30 if (nargin < 1)
wolffd@0 31 clc;
wolffd@0 32 help xml2struct
wolffd@0 33 return
wolffd@0 34 end
wolffd@0 35
wolffd@0 36 if isa(file, 'org.apache.xerces.dom.DeferredDocumentImpl') || isa(file, 'org.apache.xerces.dom.DeferredElementImpl')
wolffd@0 37 % input is a java xml object
wolffd@0 38 xDoc = file;
wolffd@0 39 else
wolffd@0 40 %check for existance
wolffd@0 41 if (exist(file,'file') == 0)
wolffd@0 42 %Perhaps the xml extension was omitted from the file name. Add the
wolffd@0 43 %extension and try again.
wolffd@0 44 if (isempty(strfind(file,'.xml')))
wolffd@0 45 file = [file '.xml'];
wolffd@0 46 end
wolffd@0 47
wolffd@0 48 if (exist(file,'file') == 0)
wolffd@0 49 error(['The file ' file ' could not be found']);
wolffd@0 50 end
wolffd@0 51 end
wolffd@0 52 %read the xml file
wolffd@0 53 xDoc = xmlread(file);
wolffd@0 54 end
wolffd@0 55
wolffd@0 56 %parse xDoc into a MATLAB structure
wolffd@0 57 s = parseChildNodes(xDoc);
wolffd@0 58
wolffd@0 59 end
wolffd@0 60
wolffd@0 61 % ----- Subfunction parseChildNodes -----
wolffd@0 62 function [children,ptext,textflag] = parseChildNodes(theNode)
wolffd@0 63 % Recurse over node children.
wolffd@0 64 children = struct;
wolffd@0 65 ptext = struct; textflag = 'Text';
wolffd@0 66 if hasChildNodes(theNode)
wolffd@0 67 childNodes = getChildNodes(theNode);
wolffd@0 68 numChildNodes = getLength(childNodes);
wolffd@0 69
wolffd@0 70 for count = 1:numChildNodes
wolffd@0 71 theChild = item(childNodes,count-1);
wolffd@0 72 [text,name,attr,childs,textflag] = getNodeData(theChild);
wolffd@0 73
wolffd@0 74 if (~strcmp(name,'#text') && ~strcmp(name,'#comment') && ~strcmp(name,'#cdata_dash_section'))
wolffd@0 75 %XML allows the same elements to be defined multiple times,
wolffd@0 76 %put each in a different cell
wolffd@0 77 if (isfield(children,name))
wolffd@0 78 if (~iscell(children.(name)))
wolffd@0 79 %put existsing element into cell format
wolffd@0 80 children.(name) = {children.(name)};
wolffd@0 81 end
wolffd@0 82 index = length(children.(name))+1;
wolffd@0 83 %add new element
wolffd@0 84 children.(name){index} = childs;
wolffd@0 85 textFieldNames = fieldnames(text);
wolffd@0 86 for t = 1:length(textFieldNames)
wolffd@0 87 textFieldName = textFieldNames{t};
wolffd@0 88 children.(name){index}.(textFieldName) = text.(textFieldName);
wolffd@0 89 end
wolffd@0 90 if(~isempty(attr))
wolffd@0 91 children.(name){index}.('Attributes') = attr;
wolffd@0 92 end
wolffd@0 93 else
wolffd@0 94 %add previously unknown (new) element to the structure
wolffd@0 95 children.(name) = childs;
wolffd@0 96 if(~isempty(text) && ~isempty(fieldnames(text)))
wolffd@0 97 textFieldNames = fieldnames(text);
wolffd@0 98 numTextFieldNames = length( textFieldNames );
wolffd@0 99 for i = 1:numTextFieldNames
wolffd@0 100 thisFieldName = textFieldNames{i};
wolffd@0 101 children.(name).(thisFieldName) = text.(thisFieldName);
wolffd@0 102 end
wolffd@0 103 end
wolffd@0 104 if(~isempty(attr))
wolffd@0 105 children.(name).('Attributes') = attr;
wolffd@0 106 end
wolffd@0 107 end
wolffd@0 108 else
wolffd@0 109 ptextflag = 'Text';
wolffd@0 110 if (strcmp(name, '#cdata_dash_section'))
wolffd@0 111 ptextflag = 'CDATA';
wolffd@0 112 elseif (strcmp(name, '#comment'))
wolffd@0 113 ptextflag = 'Comment';
wolffd@0 114 end
wolffd@0 115
wolffd@0 116 %this is the text in an element (i.e., the parentNode)
wolffd@0 117 if (~isempty(regexprep(text.(textflag),'[\s]*','')))
wolffd@0 118 if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag)))
wolffd@0 119 ptext.(ptextflag) = text.(textflag);
wolffd@0 120 else
wolffd@0 121 %what to do when element data is as follows:
wolffd@0 122 %<element>Text <!--Comment--> More text</element>
wolffd@0 123
wolffd@0 124 %put the text in different cells:
wolffd@0 125 % if (~iscell(ptext)) ptext = {ptext}; end
wolffd@0 126 % ptext{length(ptext)+1} = text;
wolffd@0 127
wolffd@0 128 %just append the text
wolffd@0 129 ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)];
wolffd@0 130 end
wolffd@0 131 end
wolffd@0 132 end
wolffd@0 133
wolffd@0 134 end
wolffd@0 135 end
wolffd@0 136 end
wolffd@0 137
wolffd@0 138 % ----- Subfunction getNodeData -----
wolffd@0 139 function [text,name,attr,childs,textflag] = getNodeData(theNode)
wolffd@0 140 % Create structure of node info.
wolffd@0 141
wolffd@0 142 %make sure name is allowed as structure name
wolffd@0 143 name = toCharArray(getNodeName(theNode))';
wolffd@0 144 name = strrep(name, '-', '_dash_');
wolffd@0 145 name = strrep(name, ':', '_colon_');
wolffd@0 146 name = strrep(name, '.', '_dot_');
wolffd@0 147
wolffd@0 148 attr = parseAttributes(theNode);
wolffd@0 149 if (isempty(fieldnames(attr)))
wolffd@0 150 attr = [];
wolffd@0 151 end
wolffd@0 152
wolffd@0 153 %parse child nodes
wolffd@0 154 [childs,text,textflag] = parseChildNodes(theNode);
wolffd@0 155
wolffd@0 156 if (isempty(fieldnames(childs)) && isempty(fieldnames(text)))
wolffd@0 157 %get the data of any childless nodes
wolffd@0 158 % faster than if any(strcmp(methods(theNode), 'getData'))
wolffd@0 159 % no need to try-catch (?)
wolffd@0 160 % faster than text = char(getData(theNode));
wolffd@0 161 text.(textflag) = toCharArray(getTextContent(theNode))';
wolffd@0 162 end
wolffd@0 163
wolffd@0 164 end
wolffd@0 165
wolffd@0 166 % ----- Subfunction parseAttributes -----
wolffd@0 167 function attributes = parseAttributes(theNode)
wolffd@0 168 % Create attributes structure.
wolffd@0 169
wolffd@0 170 attributes = struct;
wolffd@0 171 if hasAttributes(theNode)
wolffd@0 172 theAttributes = getAttributes(theNode);
wolffd@0 173 numAttributes = getLength(theAttributes);
wolffd@0 174
wolffd@0 175 for count = 1:numAttributes
wolffd@0 176 %attrib = item(theAttributes,count-1);
wolffd@0 177 %attr_name = regexprep(char(getName(attrib)),'[-:.]','_');
wolffd@0 178 %attributes.(attr_name) = char(getValue(attrib));
wolffd@0 179
wolffd@0 180 %Suggestion of Adrian Wanner
wolffd@0 181 str = toCharArray(toString(item(theAttributes,count-1)))';
wolffd@0 182 k = strfind(str,'=');
wolffd@0 183 attr_name = str(1:(k(1)-1));
wolffd@0 184 attr_name = strrep(attr_name, '-', '_dash_');
wolffd@0 185 attr_name = strrep(attr_name, ':', '_colon_');
wolffd@0 186 attr_name = strrep(attr_name, '.', '_dot_');
wolffd@0 187 attributes.(attr_name) = str((k(1)+2):(end-1));
wolffd@0 188 end
wolffd@0 189 end
wolffd@0 190 end