wolffd@0: function [Adj, labels, x, y] = dot_to_graph(filename) wolffd@0: wolffd@0: % [Adj, labels, x, y] = dot_to_graph(filename) wolffd@0: % Extract a matrix representation, node labels, and node position coordinates wolffd@0: % from a file in GraphViz format http://www.research.att.com/sw/tools/graphviz wolffd@0: % wolffd@0: % INPUTS: wolffd@0: % 'filename' - the file in DOT format containing the graph layout. wolffd@0: % OUTPUT: wolffd@0: % 'Adj' - an adjacency matrix representation of the graph in 'filename'; wolffd@0: % 'labels' - a character array with the names of the nodes of the graph; wolffd@0: % 'x' - a row vector with the x-coordinates of the nodes in 'filename'; wolffd@0: % 'y' - a row vector with the y-coordinates of the nodes in 'filename'. wolffd@0: % wolffd@0: % WARNINGS: not guaranted to parse ANY GraphViz file. Debugged on undirected wolffd@0: % sample graphs from GraphViz(Heawood, Petersen, ER, ngk10_4, process). wolffd@0: % Complaines about RecursionLimit set only to 500 on huge graphs. wolffd@0: % Ignores singletons (disjoint nodes). wolffd@0: % Sample DOT code "ABC.dot", read by [Adj, labels, x, y] = dot_to_graph('ABC.dot') wolffd@0: % digraph G { wolffd@0: % A [pos="28,31"]; wolffd@0: % B [pos="74,87"]; wolffd@0: % A -- B [pos="e,61,71 41,47 46,53 50,58 55,64"]; wolffd@0: % } wolffd@0: % last modified: Jan 2004 wolffd@0: % by Alexi Savov: asavov @wustl.edu | http://artsci.wustl.edu/~azsavov wolffd@0: % Leon Peshkin: pesha @ai.mit.edu | http://www.ai.mit.edu/~pesha wolffd@0: wolffd@0: if ~exist(filename) % Checks whether the specified file exists. wolffd@0: error('* * * File does not exist or could not be found. * * *'); return; wolffd@0: end; wolffd@0: wolffd@0: lines = textread(filename,'%s','delimiter','\n','commentstyle','c'); % Read file into cell array wolffd@0: dot_lines = strvcat(lines); % of lines, ignoring C-style comments wolffd@0: wolffd@0: if findstr(dot_lines(1,:), 'graph ') == [] % Is this a DOT file ? wolffd@0: error('* * * File does not appear to be in valid DOT format. * * *'); return; wolffd@0: end; wolffd@0: wolffd@0: Nlns = size(dot_lines,1); % The number of lines; wolffd@0: labels = {}; wolffd@0: unread = 1:Nlns; % 'unread' list of lines which has not been examined yet wolffd@0: edge_id = 1; wolffd@0: for line_ndx = 1:Nlns % This section sets the adjacency matrix A(Lnode,Rnode) = edge_id. wolffd@0: line = dot_lines(line_ndx,:); wolffd@0: Ddash_pos = strfind(line, ' -- ') + 1; % double dash positions wolffd@0: arrow_pos = strfind(line, ' -> ') + 1; % arrow dash positions wolffd@0: tokens = strread(line,'%s','delimiter',' "'); wolffd@0: left_bound = 1; wolffd@0: for dash_pos = [Ddash_pos arrow_pos]; % if empty - not a POS line wolffd@0: Lnode = sscanf(line(left_bound:dash_pos -2), '%s'); wolffd@0: Rnode = sscanf(line(dash_pos +3 : length(line)-1),'%s',1); wolffd@0: Lndx = strmatch(Lnode, labels, 'exact'); wolffd@0: Rndx = strmatch(Rnode, labels, 'exact'); wolffd@0: if isempty(Lndx) % extend our list of labels wolffd@0: labels{end+1} = Lnode; wolffd@0: Lndx = length(labels); wolffd@0: end wolffd@0: if isempty(Rndx) wolffd@0: labels{end+1} = Rnode; wolffd@0: Rndx = length(labels); wolffd@0: end wolffd@0: Adj(Lndx, Rndx) = edge_id;; wolffd@0: if ismember(dash_pos, Ddash_pos) % The edge is undirected, A(Rndx,LndxL) is also set to 1; wolffd@0: Adj(Rndx, Lndx) = edge_id; wolffd@0: end wolffd@0: edge_id = edge_id + 1; wolffd@0: left_bound = dash_pos + 3; wolffd@0: unread = setdiff(unread, line_ndx); wolffd@0: end wolffd@0: end wolffd@0: Nvrt = length(labels); % number of vertices we found [Do we ever have singleton vertices ???] wolffd@0: % labels = strvcat(labels); % convert to the searchable array wolffd@0: x = zeros(1, Nvrt); wolffd@0: y = zeros(1, Nvrt); wolffd@0: lst_node = 0; wolffd@0: % Find node's position coordinates if they are contained in 'filename'. wolffd@0: for line_ndx = unread % Look for node's coordiantes among the 'unread' lines. wolffd@0: line = dot_lines(line_ndx,:); wolffd@0: bra_pos = strfind(line, '['); % has to have "[" if it has the lable wolffd@0: pos_pos = strfind(line, 'pos'); % position of the "pos" wolffd@0: for node = 1:Nvrt % look through the list of labels wolffd@0: % THE NEXT STATEMENT we assume no label is substring of any other label wolffd@0: lbl_pos = strfind(line, labels{node}); wolffd@0: if (~isempty(lbl_pos) & ~isempty(bra_pos) & (x(node) == 0)) % make sure we have not seen it wolffd@0: if (lbl_pos(1) < bra_pos(1)) % label has to be to the left of braket wolffd@0: lst_node = node; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: if (~isempty(pos_pos) & lst_node) % this line contains SOME position wolffd@0: [node_pos] = sscanf(line(pos_pos:length(line)), ' pos = "%d,%d"')'; wolffd@0: x(lst_node) = node_pos(1); wolffd@0: y(lst_node) = node_pos(2); wolffd@0: lst_node = 0; % not to assign position several times wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: if (isempty(find(x)) & (nargout > 2)) % If coordinates were requested, but not found in 'filename'. wolffd@0: warning('File does not contain node coordinates.'); wolffd@0: end; wolffd@0: if ~(size(Adj,1)==size(Adj,2)) % Make sure Adj is a square matrix. ? wolffd@0: Adj = eye(max(size(Adj)),size(Adj,1))*Adj*eye(size(Adj,2),max(size(Adj))); wolffd@0: end; wolffd@0: x = .9*(x-min(x))/range(x)+.05; % normalise and push off margins wolffd@0: y = .9*(y-min(y))/range(y)+.05; wolffd@0: