comparison core/tools/xml2struct.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function [ s ] = xml2struct( file )
2 %Convert xml file into a MATLAB structure
3 % [ s ] = xml2struct( file )
4 %
5 % A file containing:
6 % <XMLname attrib1="Some value">
7 % <Element>Some text</Element>
8 % <DifferentElement attrib2="2">Some more text</Element>
9 % <DifferentElement attrib3="2" attrib4="1">Even more text</DifferentElement>
10 % </XMLname>
11 %
12 % Will produce:
13 % s.XMLname.Attributes.attrib1 = "Some value";
14 % s.XMLname.Element.Text = "Some text";
15 % s.XMLname.DifferentElement{1}.Attributes.attrib2 = "2";
16 % s.XMLname.DifferentElement{1}.Text = "Some more text";
17 % s.XMLname.DifferentElement{2}.Attributes.attrib3 = "2";
18 % s.XMLname.DifferentElement{2}.Attributes.attrib4 = "1";
19 % s.XMLname.DifferentElement{2}.Text = "Even more text";
20 %
21 % Please note that the following characters are substituted
22 % '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_'
23 %
24 % Written by W. Falkena, ASTI, TUDelft, 21-08-2010
25 % Attribute parsing speed increased by 40% by A. Wanner, 14-6-2011
26 % Added CDATA support by I. Smirnov, 20-3-2012
27 %
28 % Modified by X. Mo, University of Wisconsin, 12-5-2012
29
30 if (nargin < 1)
31 clc;
32 help xml2struct
33 return
34 end
35
36 if isa(file, 'org.apache.xerces.dom.DeferredDocumentImpl') || isa(file, 'org.apache.xerces.dom.DeferredElementImpl')
37 % input is a java xml object
38 xDoc = file;
39 else
40 %check for existance
41 if (exist(file,'file') == 0)
42 %Perhaps the xml extension was omitted from the file name. Add the
43 %extension and try again.
44 if (isempty(strfind(file,'.xml')))
45 file = [file '.xml'];
46 end
47
48 if (exist(file,'file') == 0)
49 error(['The file ' file ' could not be found']);
50 end
51 end
52 %read the xml file
53 xDoc = xmlread(file);
54 end
55
56 %parse xDoc into a MATLAB structure
57 s = parseChildNodes(xDoc);
58
59 end
60
61 % ----- Subfunction parseChildNodes -----
62 function [children,ptext,textflag] = parseChildNodes(theNode)
63 % Recurse over node children.
64 children = struct;
65 ptext = struct; textflag = 'Text';
66 if hasChildNodes(theNode)
67 childNodes = getChildNodes(theNode);
68 numChildNodes = getLength(childNodes);
69
70 for count = 1:numChildNodes
71 theChild = item(childNodes,count-1);
72 [text,name,attr,childs,textflag] = getNodeData(theChild);
73
74 if (~strcmp(name,'#text') && ~strcmp(name,'#comment') && ~strcmp(name,'#cdata_dash_section'))
75 %XML allows the same elements to be defined multiple times,
76 %put each in a different cell
77 if (isfield(children,name))
78 if (~iscell(children.(name)))
79 %put existsing element into cell format
80 children.(name) = {children.(name)};
81 end
82 index = length(children.(name))+1;
83 %add new element
84 children.(name){index} = childs;
85 textFieldNames = fieldnames(text);
86 for t = 1:length(textFieldNames)
87 textFieldName = textFieldNames{t};
88 children.(name){index}.(textFieldName) = text.(textFieldName);
89 end
90 if(~isempty(attr))
91 children.(name){index}.('Attributes') = attr;
92 end
93 else
94 %add previously unknown (new) element to the structure
95 children.(name) = childs;
96 if(~isempty(text) && ~isempty(fieldnames(text)))
97 textFieldNames = fieldnames(text);
98 numTextFieldNames = length( textFieldNames );
99 for i = 1:numTextFieldNames
100 thisFieldName = textFieldNames{i};
101 children.(name).(thisFieldName) = text.(thisFieldName);
102 end
103 end
104 if(~isempty(attr))
105 children.(name).('Attributes') = attr;
106 end
107 end
108 else
109 ptextflag = 'Text';
110 if (strcmp(name, '#cdata_dash_section'))
111 ptextflag = 'CDATA';
112 elseif (strcmp(name, '#comment'))
113 ptextflag = 'Comment';
114 end
115
116 %this is the text in an element (i.e., the parentNode)
117 if (~isempty(regexprep(text.(textflag),'[\s]*','')))
118 if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag)))
119 ptext.(ptextflag) = text.(textflag);
120 else
121 %what to do when element data is as follows:
122 %<element>Text <!--Comment--> More text</element>
123
124 %put the text in different cells:
125 % if (~iscell(ptext)) ptext = {ptext}; end
126 % ptext{length(ptext)+1} = text;
127
128 %just append the text
129 ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)];
130 end
131 end
132 end
133
134 end
135 end
136 end
137
138 % ----- Subfunction getNodeData -----
139 function [text,name,attr,childs,textflag] = getNodeData(theNode)
140 % Create structure of node info.
141
142 %make sure name is allowed as structure name
143 name = toCharArray(getNodeName(theNode))';
144 name = strrep(name, '-', '_dash_');
145 name = strrep(name, ':', '_colon_');
146 name = strrep(name, '.', '_dot_');
147
148 attr = parseAttributes(theNode);
149 if (isempty(fieldnames(attr)))
150 attr = [];
151 end
152
153 %parse child nodes
154 [childs,text,textflag] = parseChildNodes(theNode);
155
156 if (isempty(fieldnames(childs)) && isempty(fieldnames(text)))
157 %get the data of any childless nodes
158 % faster than if any(strcmp(methods(theNode), 'getData'))
159 % no need to try-catch (?)
160 % faster than text = char(getData(theNode));
161 text.(textflag) = toCharArray(getTextContent(theNode))';
162 end
163
164 end
165
166 % ----- Subfunction parseAttributes -----
167 function attributes = parseAttributes(theNode)
168 % Create attributes structure.
169
170 attributes = struct;
171 if hasAttributes(theNode)
172 theAttributes = getAttributes(theNode);
173 numAttributes = getLength(theAttributes);
174
175 for count = 1:numAttributes
176 %attrib = item(theAttributes,count-1);
177 %attr_name = regexprep(char(getName(attrib)),'[-:.]','_');
178 %attributes.(attr_name) = char(getValue(attrib));
179
180 %Suggestion of Adrian Wanner
181 str = toCharArray(toString(item(theAttributes,count-1)))';
182 k = strfind(str,'=');
183 attr_name = str(1:(k(1)-1));
184 attr_name = strrep(attr_name, '-', '_dash_');
185 attr_name = strrep(attr_name, ':', '_colon_');
186 attr_name = strrep(attr_name, '.', '_dot_');
187 attributes.(attr_name) = str((k(1)+2):(end-1));
188 end
189 end
190 end