wolffd@0
|
1 function [ s ] = xml2struct( file )
|
wolffd@0
|
2 %Convert xml file into a MATLAB structure
|
wolffd@0
|
3 % [ s ] = xml2struct( file )
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % A file containing:
|
wolffd@0
|
6 % <XMLname attrib1="Some value">
|
wolffd@0
|
7 % <Element>Some text</Element>
|
wolffd@0
|
8 % <DifferentElement attrib2="2">Some more text</Element>
|
wolffd@0
|
9 % <DifferentElement attrib3="2" attrib4="1">Even more text</DifferentElement>
|
wolffd@0
|
10 % </XMLname>
|
wolffd@0
|
11 %
|
wolffd@0
|
12 % Will produce:
|
wolffd@0
|
13 % s.XMLname.Attributes.attrib1 = "Some value";
|
wolffd@0
|
14 % s.XMLname.Element.Text = "Some text";
|
wolffd@0
|
15 % s.XMLname.DifferentElement{1}.Attributes.attrib2 = "2";
|
wolffd@0
|
16 % s.XMLname.DifferentElement{1}.Text = "Some more text";
|
wolffd@0
|
17 % s.XMLname.DifferentElement{2}.Attributes.attrib3 = "2";
|
wolffd@0
|
18 % s.XMLname.DifferentElement{2}.Attributes.attrib4 = "1";
|
wolffd@0
|
19 % s.XMLname.DifferentElement{2}.Text = "Even more text";
|
wolffd@0
|
20 %
|
wolffd@0
|
21 % Please note that the following characters are substituted
|
wolffd@0
|
22 % '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_'
|
wolffd@0
|
23 %
|
wolffd@0
|
24 % Written by W. Falkena, ASTI, TUDelft, 21-08-2010
|
wolffd@0
|
25 % Attribute parsing speed increased by 40% by A. Wanner, 14-6-2011
|
wolffd@0
|
26 % Added CDATA support by I. Smirnov, 20-3-2012
|
wolffd@0
|
27 %
|
wolffd@0
|
28 % Modified by X. Mo, University of Wisconsin, 12-5-2012
|
wolffd@0
|
29
|
wolffd@0
|
30 if (nargin < 1)
|
wolffd@0
|
31 clc;
|
wolffd@0
|
32 help xml2struct
|
wolffd@0
|
33 return
|
wolffd@0
|
34 end
|
wolffd@0
|
35
|
wolffd@0
|
36 if isa(file, 'org.apache.xerces.dom.DeferredDocumentImpl') || isa(file, 'org.apache.xerces.dom.DeferredElementImpl')
|
wolffd@0
|
37 % input is a java xml object
|
wolffd@0
|
38 xDoc = file;
|
wolffd@0
|
39 else
|
wolffd@0
|
40 %check for existance
|
wolffd@0
|
41 if (exist(file,'file') == 0)
|
wolffd@0
|
42 %Perhaps the xml extension was omitted from the file name. Add the
|
wolffd@0
|
43 %extension and try again.
|
wolffd@0
|
44 if (isempty(strfind(file,'.xml')))
|
wolffd@0
|
45 file = [file '.xml'];
|
wolffd@0
|
46 end
|
wolffd@0
|
47
|
wolffd@0
|
48 if (exist(file,'file') == 0)
|
wolffd@0
|
49 error(['The file ' file ' could not be found']);
|
wolffd@0
|
50 end
|
wolffd@0
|
51 end
|
wolffd@0
|
52 %read the xml file
|
wolffd@0
|
53 xDoc = xmlread(file);
|
wolffd@0
|
54 end
|
wolffd@0
|
55
|
wolffd@0
|
56 %parse xDoc into a MATLAB structure
|
wolffd@0
|
57 s = parseChildNodes(xDoc);
|
wolffd@0
|
58
|
wolffd@0
|
59 end
|
wolffd@0
|
60
|
wolffd@0
|
61 % ----- Subfunction parseChildNodes -----
|
wolffd@0
|
62 function [children,ptext,textflag] = parseChildNodes(theNode)
|
wolffd@0
|
63 % Recurse over node children.
|
wolffd@0
|
64 children = struct;
|
wolffd@0
|
65 ptext = struct; textflag = 'Text';
|
wolffd@0
|
66 if hasChildNodes(theNode)
|
wolffd@0
|
67 childNodes = getChildNodes(theNode);
|
wolffd@0
|
68 numChildNodes = getLength(childNodes);
|
wolffd@0
|
69
|
wolffd@0
|
70 for count = 1:numChildNodes
|
wolffd@0
|
71 theChild = item(childNodes,count-1);
|
wolffd@0
|
72 [text,name,attr,childs,textflag] = getNodeData(theChild);
|
wolffd@0
|
73
|
wolffd@0
|
74 if (~strcmp(name,'#text') && ~strcmp(name,'#comment') && ~strcmp(name,'#cdata_dash_section'))
|
wolffd@0
|
75 %XML allows the same elements to be defined multiple times,
|
wolffd@0
|
76 %put each in a different cell
|
wolffd@0
|
77 if (isfield(children,name))
|
wolffd@0
|
78 if (~iscell(children.(name)))
|
wolffd@0
|
79 %put existsing element into cell format
|
wolffd@0
|
80 children.(name) = {children.(name)};
|
wolffd@0
|
81 end
|
wolffd@0
|
82 index = length(children.(name))+1;
|
wolffd@0
|
83 %add new element
|
wolffd@0
|
84 children.(name){index} = childs;
|
wolffd@0
|
85 textFieldNames = fieldnames(text);
|
wolffd@0
|
86 for t = 1:length(textFieldNames)
|
wolffd@0
|
87 textFieldName = textFieldNames{t};
|
wolffd@0
|
88 children.(name){index}.(textFieldName) = text.(textFieldName);
|
wolffd@0
|
89 end
|
wolffd@0
|
90 if(~isempty(attr))
|
wolffd@0
|
91 children.(name){index}.('Attributes') = attr;
|
wolffd@0
|
92 end
|
wolffd@0
|
93 else
|
wolffd@0
|
94 %add previously unknown (new) element to the structure
|
wolffd@0
|
95 children.(name) = childs;
|
wolffd@0
|
96 if(~isempty(text) && ~isempty(fieldnames(text)))
|
wolffd@0
|
97 textFieldNames = fieldnames(text);
|
wolffd@0
|
98 numTextFieldNames = length( textFieldNames );
|
wolffd@0
|
99 for i = 1:numTextFieldNames
|
wolffd@0
|
100 thisFieldName = textFieldNames{i};
|
wolffd@0
|
101 children.(name).(thisFieldName) = text.(thisFieldName);
|
wolffd@0
|
102 end
|
wolffd@0
|
103 end
|
wolffd@0
|
104 if(~isempty(attr))
|
wolffd@0
|
105 children.(name).('Attributes') = attr;
|
wolffd@0
|
106 end
|
wolffd@0
|
107 end
|
wolffd@0
|
108 else
|
wolffd@0
|
109 ptextflag = 'Text';
|
wolffd@0
|
110 if (strcmp(name, '#cdata_dash_section'))
|
wolffd@0
|
111 ptextflag = 'CDATA';
|
wolffd@0
|
112 elseif (strcmp(name, '#comment'))
|
wolffd@0
|
113 ptextflag = 'Comment';
|
wolffd@0
|
114 end
|
wolffd@0
|
115
|
wolffd@0
|
116 %this is the text in an element (i.e., the parentNode)
|
wolffd@0
|
117 if (~isempty(regexprep(text.(textflag),'[\s]*','')))
|
wolffd@0
|
118 if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag)))
|
wolffd@0
|
119 ptext.(ptextflag) = text.(textflag);
|
wolffd@0
|
120 else
|
wolffd@0
|
121 %what to do when element data is as follows:
|
wolffd@0
|
122 %<element>Text <!--Comment--> More text</element>
|
wolffd@0
|
123
|
wolffd@0
|
124 %put the text in different cells:
|
wolffd@0
|
125 % if (~iscell(ptext)) ptext = {ptext}; end
|
wolffd@0
|
126 % ptext{length(ptext)+1} = text;
|
wolffd@0
|
127
|
wolffd@0
|
128 %just append the text
|
wolffd@0
|
129 ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)];
|
wolffd@0
|
130 end
|
wolffd@0
|
131 end
|
wolffd@0
|
132 end
|
wolffd@0
|
133
|
wolffd@0
|
134 end
|
wolffd@0
|
135 end
|
wolffd@0
|
136 end
|
wolffd@0
|
137
|
wolffd@0
|
138 % ----- Subfunction getNodeData -----
|
wolffd@0
|
139 function [text,name,attr,childs,textflag] = getNodeData(theNode)
|
wolffd@0
|
140 % Create structure of node info.
|
wolffd@0
|
141
|
wolffd@0
|
142 %make sure name is allowed as structure name
|
wolffd@0
|
143 name = toCharArray(getNodeName(theNode))';
|
wolffd@0
|
144 name = strrep(name, '-', '_dash_');
|
wolffd@0
|
145 name = strrep(name, ':', '_colon_');
|
wolffd@0
|
146 name = strrep(name, '.', '_dot_');
|
wolffd@0
|
147
|
wolffd@0
|
148 attr = parseAttributes(theNode);
|
wolffd@0
|
149 if (isempty(fieldnames(attr)))
|
wolffd@0
|
150 attr = [];
|
wolffd@0
|
151 end
|
wolffd@0
|
152
|
wolffd@0
|
153 %parse child nodes
|
wolffd@0
|
154 [childs,text,textflag] = parseChildNodes(theNode);
|
wolffd@0
|
155
|
wolffd@0
|
156 if (isempty(fieldnames(childs)) && isempty(fieldnames(text)))
|
wolffd@0
|
157 %get the data of any childless nodes
|
wolffd@0
|
158 % faster than if any(strcmp(methods(theNode), 'getData'))
|
wolffd@0
|
159 % no need to try-catch (?)
|
wolffd@0
|
160 % faster than text = char(getData(theNode));
|
wolffd@0
|
161 text.(textflag) = toCharArray(getTextContent(theNode))';
|
wolffd@0
|
162 end
|
wolffd@0
|
163
|
wolffd@0
|
164 end
|
wolffd@0
|
165
|
wolffd@0
|
166 % ----- Subfunction parseAttributes -----
|
wolffd@0
|
167 function attributes = parseAttributes(theNode)
|
wolffd@0
|
168 % Create attributes structure.
|
wolffd@0
|
169
|
wolffd@0
|
170 attributes = struct;
|
wolffd@0
|
171 if hasAttributes(theNode)
|
wolffd@0
|
172 theAttributes = getAttributes(theNode);
|
wolffd@0
|
173 numAttributes = getLength(theAttributes);
|
wolffd@0
|
174
|
wolffd@0
|
175 for count = 1:numAttributes
|
wolffd@0
|
176 %attrib = item(theAttributes,count-1);
|
wolffd@0
|
177 %attr_name = regexprep(char(getName(attrib)),'[-:.]','_');
|
wolffd@0
|
178 %attributes.(attr_name) = char(getValue(attrib));
|
wolffd@0
|
179
|
wolffd@0
|
180 %Suggestion of Adrian Wanner
|
wolffd@0
|
181 str = toCharArray(toString(item(theAttributes,count-1)))';
|
wolffd@0
|
182 k = strfind(str,'=');
|
wolffd@0
|
183 attr_name = str(1:(k(1)-1));
|
wolffd@0
|
184 attr_name = strrep(attr_name, '-', '_dash_');
|
wolffd@0
|
185 attr_name = strrep(attr_name, ':', '_colon_');
|
wolffd@0
|
186 attr_name = strrep(attr_name, '.', '_dot_');
|
wolffd@0
|
187 attributes.(attr_name) = str((k(1)+2):(end-1));
|
wolffd@0
|
188 end
|
wolffd@0
|
189 end
|
wolffd@0
|
190 end |