User:Zeracles/Starmap/Minimal Spanning Tree

From Ultronomicon
< User:Zeracles‎ | Starmap
Revision as of 04:33, 23 February 2010 by Zeracles (talk | contribs) (→‎Boruvka's Algorithm: minor bugfix and added Minimally Connected Opaque Friend Graph option)
Jump to navigation Jump to search

I originally coded up Jarnik/Prim's algorithm. In the process of optimising it, I unwittingly reinvented Boruvka's algorithm, which handles large numbers of input points better, at least the way I've done it. Demonstration here.

Boruvka's Algorithm

Also downloadable here

%	minimal spannng tree in matlab
%		Boruvka's algorithm (independently invented by yours truly)
%	see http://www.star-control.com/forum/index.php/topic,1660
%	by Anthony Smith (Zeracles)
%	astroant@hotmail.com

%	if only after an MST, leave separation=pruning=MCOFG=0

load('inputpositions')

localcount=8;

quantify=0;

separation=0;
separationlength=0.03;

pruning=0;
prunesteps=1; % how many times to have a go at the tree
prunelevel=10; % length of branch to cut each time

MCOFG=0; % to create Minimally Connected Opaque Friend Graph (superset of MST)
friendliness=1; % default value is one
localityfactor=2; % default value is one

%	detect input properties and set up index
sizeinputpositions=size(inputpositions);
lengthinputpositions=sizeinputpositions(1);
dimensions=sizeinputpositions(2);

index=transpose(linspace(1,lengthinputpositions,lengthinputpositions));

inputpositionxs=inputpositions(:,1);
inputpositionys=inputpositions(:,2);
if dimensions>2
	inputpositionzs=inputpositions(:,3);
end

%	initialise search parameters
minx=min(inputpositionxs);
maxx=max(inputpositionxs);
xextent=minx*maxx;
if xextent==0
	xextent=eps;
end

miny=min(inputpositionys);
maxy=max(inputpositionys);
yextent=miny*maxy;
if yextent==0
	yextent=eps;
end

if dimensions>2
	minz=min(inputpositionzs);
	maxz=max(inputpositionzs);
	zextent=minz*maxz;
	if zextent==0
		zextent=eps;
	end
end

if dimensions==2
	measure=xextent*yextent;
	localmeasure=measure/lengthinputpositions;
	searchincrement=((localcount*localmeasure)/pi)^(1/2);
end
if dimensions==3
	measure=xextent*yextent*zextent;
	localmeasure=measure/lengthinputpositions;
	searchincrement=((localcount*localmeasure)/(4/3)/pi)^(1/3);
end

treenumber=index;
numbertrees=length(unique(treenumber));
branches=[];
searchfactor=1; % setting this to zero can lead to infinite loop if this is scaled multiplicatively
searchfactors=zeros(lengthinputpositions,1);

while numbertrees>1 % each input point is treated as a separate tree, and we join them together until we only have one tree
	searchfactor=2*searchfactor; % calculate interpoint distances for ``close" points, with ``close" defined by this variable
	while min(searchfactors)<searchfactor % searchfactors stores the searchfactors searched for each existing tree
		tree=treenumber(min(find(searchfactors<(searchfactor)))); % choose a tree to search which has been searched with the smallest distance so far
		treesearched=0;
		while treesearched==0 % while we're not yet done looking around the nodes of this tree for links
			maskvector=treenumber==tree;
			treeindices=find(maskvector);
			lengthtreepositions=sum(maskvector);
			
			remainingpositionxs=inputpositionxs(find(maskvector==0));
			remainingpositionys=inputpositionys(find(maskvector==0));
			if dimensions>2
				remainingpositionzs=inputpositionzs(find(maskvector==0));
			end
			
			remainingpositionindices=index(find(maskvector==0));
			
			mindistances=[];
			for i=linspace(1,lengthtreepositions,lengthtreepositions) % search around each of the tree positions for remainingpositions
				%	retrieve coordinates of a tree position
				treepositionx=inputpositions(treeindices(i),1);
				treepositiony=inputpositions(treeindices(i),2);
				if dimensions>2
					treepositionz=inputpositions(treeindices(i),3);
				end
				treeindex=treeindices(i);
				
				%	search for remainingpositions
				boxradius=searchfactor*searchincrement;
				
				boxminx=treepositionx-boxradius;
				boxmaxx=treepositionx+boxradius;
				boxminy=treepositiony-boxradius;
				boxmaxy=treepositiony+boxradius;
				if dimensions>2
					boxminz=treepositionz-boxradius;
					boxmaxz=treepositionz+boxradius;
				end
				
				if dimensions==2
					closerremainingmask=(boxminx<=remainingpositionxs).*(remainingpositionxs<=boxmaxx).*(boxminy<=remainingpositionys).*(remainingpositionys<=boxmaxy);
				end
				if dimensions==3
					closerremainingmask=(boxminx<=remainingpositionxs).*(remainingpositionxs<=boxmaxx).*(boxminy<=remainingpositionys).*(remainingpositionys<=boxmaxy).*(boxminz<=remainingpositionzs).*(remainingpositionzs<=boxmaxz);
				end
				
				mindistancecandidates=[];
				mindistancecandidateindices=[];
				if sum(closerremainingmask)>0 % if there are remainingpositions inside the box, calculate the actual distances to them
					for k=transpose(find(closerremainingmask==1));
						branchcandidatex=remainingpositionxs(k);
						branchcandidatey=remainingpositionys(k);
						if dimensions>2
							branchcandidatez=remainingpositionzs(k);
						end
						branchcandidateindex=remainingpositionindices(k); % set up to remember the index of each prospective link destination
						
						if dimensions==2
							mindistancecandidate=((branchcandidatex-treepositionx).^2+(branchcandidatey-treepositiony).^2).^(1/2);
						end
						if dimensions==3
							mindistancecandidate=((branchcandidatex-treepositionx).^2+(branchcandidatey-treepositiony).^2+(branchcandidatez-treepositionz).^2).^(1/2);
						end
						if mindistancecandidate<=boxradius
							mindistancecandidates=[mindistancecandidates;mindistancecandidate];
							mindistancecandidateindices=[mindistancecandidateindices;branchcandidateindex];
						end
					end
				end
				if numel(mindistancecandidates)>0 % minimum distance from this position on the tree
					mindistance=[min(mindistancecandidates),treeindex,mindistancecandidateindices(find(mindistancecandidates==min(mindistancecandidates)))]; % minimum distances carry the indices of the prospective link destination
					mindistances=[mindistances;mindistance];
				end
			end
			if numel(mindistances)==0
				searchfactors(maskvector)=searchfactor;
				treesearched=1;
			end
			if numel(mindistances)>0
				branchdistance=min(mindistances(:,1));
				branchindex=find(branchdistance==mindistances(:,1));
				numberbranchcandidates=numel(branchindex);
				if numberbranchcandidates>1 % if there's a tie for minimum length
					branchindex=branchindex(ceil(numberbranchcandidates*rand(1)));
				end
				
				%	add this branch to the tree
				branch=mindistances(branchindex,:);
				branches=[branches;branch];
				
				%	join the two trees together
				targettree=treenumber(branch(3)); % need to update the treenumber variable, which remembers which points belong to which trees
				treenumber(find(treenumber==targettree))=tree;
				numbertrees=numbertrees-1; % so that we are one step closer to finishing
			end
		end
	end
end

%	do custom stuff with this tree
%		separation - remove all branches above a certain length
if separation==1
	separationmask=(branches(:,1))<=separationlength;
	branches=branches(find(separationmask),:);
end

%		pruning - remove wandering branches
if pruning==1
	degrees=[];
	for i=linspace(1,lengthinputpositions,lengthinputpositions)
		degree=(sum((branches(:,2))==i))+(sum((branches(:,3))==i));
		degrees=[degrees;degree];
	end
	
	while prunesteps>0
		protectednodes=index(find(degrees>2));
		
		tempprunelevel=prunelevel;
		while tempprunelevel>0
			ends=index(find(degrees==1)); % find branch end nodes
			for i=transpose(ends)
				if sum(i==protectednodes)==0
					if (degrees(i))>0
						deletemask=((branches(:,2))==i)+((branches(:,3))==i);
						cutbranch=branches(find(deletemask),:);
						
						branches=branches(find(deletemask==0),:);
						degrees(cutbranch(2))=(degrees(cutbranch(2)))-1;
						degrees(cutbranch(3))=(degrees(cutbranch(3)))-1;
					end
				end
			end
			tempprunelevel=tempprunelevel-1;
		end
		prunesteps=prunesteps-1;
	end
end

if MCOFG==1
	sizebranches=size(branches);
	lengthbranches=sizebranches(1);
	branches=[branches,repmat(1,[lengthbranches 1])]; % designate existing links as MST links (type 1)
	
	localmaxs=[];
	for i=linspace(1,lengthinputpositions,lengthinputpositions)
		MSTlengths=branches(:,1);
		branchlist=((branches(:,2))==i)+((branches(:,3))==i);
		localMSTlengths=MSTlengths(find(branchlist==1));
		localmax=max(localMSTlengths); % get length of longest adjacent MST link
		localmaxs=[localmaxs;localmax];
	end
	localmaxs=localityfactor*localmaxs;
	for i=linspace(1,lengthinputpositions,lengthinputpositions)
		localmax=localmaxs(i);
		
		neighbours=[branches(:,2),branches(:,3)];
		branchlist=((branches(:,2))==i)+((branches(:,3))==i);
		
		neighbours=neighbours(find(branchlist==1),:);
		neighbours=[neighbours(:,1);neighbours(:,2)];
		
		localcentrex=inputpositionxs(i);
		localcentrey=inputpositionys(i);
		if dimensions>2
			localcentrez=inputpositionzs(i);
		end
		
		%	retrieve all points within radius localmax
		boxminx=localcentrex-localmax;
		boxmaxx=localcentrex+localmax;
		boxminy=localcentrey-localmax;
		boxmaxy=localcentrey+localmax;
		if dimensions>2
			boxminz=localcentrez-localmax;
			boxmaxz=localcentrez+localmax;
		end
		
		if dimensions==2
			closemask=(boxminx<=inputpositionxs).*(inputpositionxs<=boxmaxx).*(boxminy<=inputpositionys).*(inputpositionys<=boxmaxy);
		end
		if dimensions==3
			closemask=(boxminx<=inputpositionxs).*(inputpositionxs<=boxmaxx).*(boxminy<=inputpositionys).*(inputpositionys<=boxmaxy).*(boxminz<=inputpositionzs).*(inputpositionzs<=boxmaxz);
		end
		
		closedistances=[];
		localindices=[];
		
		xcomponents=[];
		ycomponents=[];
		if dimensions>2
			zcomponents=[];
		end
		
		for j=transpose(find(closemask==1))
			otherpointx=inputpositionxs(j);
			otherpointy=inputpositionys(j);
			if dimensions>2
				otherpointz=inputpositionzs(j);
			end
			
			xcomponent=otherpointx-localcentrex;
			ycomponent=otherpointy-localcentrey;
			if dimensions>2
				zcomponent=otherpointz-localcentrez;
			end
			
			if dimensions==2
				closedistance=((xcomponent).^2+(ycomponent).^2).^(1/2);
			end
			if dimensions==3
				closedistance=((xcomponent).^2+(ycomponent).^2+(zcomponent).^2).^(1/2);
			end
			
			if closedistance<=localmax
				closedistances=[closedistances;closedistance];
				localindices=[localindices;j];
				
				xcomponents=[xcomponents;xcomponent];
				ycomponents=[ycomponents;ycomponent];
				if dimensions>2
					zcomponents=[zcomponents;zcomponent];
				end
			end
		end
		
		%	remove local centre point from prospective linkees
		notcentre=find(closedistances>(min(closedistances)));
		
		closedistances=closedistances(notcentre);
		localindices=localindices(notcentre);
		xcomponents=xcomponents(notcentre);
		ycomponents=ycomponents(notcentre);
		if dimensions>2
			zcomponents=zcomponents(notcentre);
		end
		
		sizeclosedistances=size(closedistances);
		lengthclosedistances=sizeclosedistances(1);
		for j=linspace(1,lengthclosedistances,lengthclosedistances)
			closedistance=closedistances(j);
			localindex=localindices(j);
			
			xcomponent=xcomponents(j);
			ycomponent=ycomponents(j);
			if dimensions>2
				zcomponent=zcomponents(j);
			end
			
			if sum(localindex==neighbours)==0 % if there's already a link to this other point, or it's the same as our local centre point, leave it alone
				if sum(closedistances<closedistance)>0 % if there are actually points that are closer, apart from the local centre point
					inclusionfactor=1; % ``inclusion" within a modified radius . . . sort of
					for k=transpose(find(closedistances<closedistance));
						if dimensions==2
							dotproduct=(xcomponent*xcomponents(k))+(ycomponent*ycomponents(k));
						end
						if dimensions==3
							dotproduct=(xcomponent*xcomponents(k))+(ycomponent*ycomponents(k))+(zcomponent*zcomponents(k));
						end
						
						angle=acos(dotproduct/(closedistance*closedistances(k)));
						inclusionfactor=inclusionfactor*((closedistance/2)/(closedistances(k)))*((pi/2)/angle)/friendliness; % I think multiplying the inclusion factor is better than some additive scheme, since this way one point can block a connection even if there are many other points working against this effect; also, dividing by the friendliness at each step might be best to make this work the same way at all scales. That the presence of neighbours can ``create" connections on the opposite side of the centre point is also intuitively acceptable, in my view
					end
					if inclusionfactor<=1
						branches=[branches;[closedistance,i,localindex,2]];
					end
				end
			end
		end
	end
end

save('branches','branches')

%	for plotting
sizebranches=size(branches);
numberofbranches=sizebranches(1);

figure
hold on
box on
if dimensions==2
	plot(inputpositions(:,1),inputpositions(:,2),'k.')
end
if dimensions==3
	plot3(inputpositions(:,1),inputpositions(:,2),inputpositions(:,3),'k.')
end

for i=linspace(1,numberofbranches,numberofbranches)
	if dimensions==2
		startindex=branches(i,2);
		finishindex=branches(i,3);
		
		startx=inputpositions(startindex,1);
		starty=inputpositions(startindex,2);
		
		finishx=inputpositions(finishindex,1);
		finishy=inputpositions(finishindex,2);
		
		line([startx;finishx],[starty;finishy],'Color','k')
	end
	if dimensions==3
		startindex=branches(i,2);
		finishindex=branches(i,3);
		
		startx=inputpositions(startindex,1);
		starty=inputpositions(startindex,2);
		startz=inputpositions(startindex,3);
		
		finishx=inputpositions(finishindex,1);
		finishy=inputpositions(finishindex,2);
		finishz=inputpositions(finishindex,3);
		
		line([startx;finishx],[starty;finishy],[startz;finishz],'Color','k')
	end
end

Jarnik/Prim's Algorithm

Also downloadable here

%	minimal spannng tree in matlab
%		Jarnik/Prim's algorithm - runs pretty slowly for any more than 50 input points
%	see http://www.star-control.com/forum/index.php/topic,1660.msg33938.html#msg33938
%	by Anthony Smith (Zeracles)
%	astroant@hotmail.com
load('inputpositions')

remainingpositions=inputpositions;

%	detect input properties and set up index
sizeremainingpositions=size(remainingpositions);
lengthremainingpositions=sizeremainingpositions(1);
dimensions=sizeremainingpositions(2);

index=transpose(linspace(1,lengthremainingpositions,lengthremainingpositions));

remainingpositionxs=remainingpositions(:,1);
remainingpositionys=remainingpositions(:,2);
if dimensions>2
	remainingpositionzs=remainingpositions(:,3);
end

%	initialise search parameters
minx=min(remainingpositionxs);
maxx=max(remainingpositionxs);
xextent=minx*maxx;

miny=min(remainingpositionys);
maxy=max(remainingpositionys);
yextent=miny*maxy;

if dimensions>2
	minz=min(remainingpositionzs);
	maxz=max(remainingpositionzs);
	zextent=minz*maxz;
end

if dimensions==2
	measure=xextent*yextent;
	localmeasure=measure/lengthremainingpositions;
	searchincrement=((10*localmeasure)/pi)^(1/2);
end
if dimensions==3
	measure=xextent*yextent*zextent;
	localmeasure=measure/lengthremainingpositions;
	searchincrement=((10*localmeasure)/(4/3)/pi)^(1/3);
end

%	initialise with a seed point
seedindex=1;
maskvector=zeros(lengthremainingpositions,1);
maskvector(seedindex)=1;

remainingpositionxs=remainingpositionxs(find(maskvector==0));
remainingpositionys=remainingpositionys(find(maskvector==0));
if dimensions>2
	remainingpositionzs=remainingpositionzs(find(maskvector==0));
end
remainingpositionindices=index(find(maskvector==0));

lengthremainingpositions=lengthremainingpositions-1;

treeindices=index(seedindex,:);
lengthtreepositions=1;

branches=[];

while lengthremainingpositions>0 % keep going until nothing remains unlinked to the tree (the ``remainingpositions")
	mindistances=[];
	for i=linspace(1,lengthtreepositions,lengthtreepositions) % search around each of the tree positions for remainingpositions
		%	retrieve coordinates of a tree position
		treepositionx=inputpositions(treeindices(i),1);
		treepositiony=inputpositions(treeindices(i),2);
		if dimensions>2
			treepositionz=inputpositions(treeindices(i),3);
		end
		treeindex=treeindices(i);
		
		occupiedbox=0;
		searchfactor=1;
		while occupiedbox==0 % search for remainingpositions inside a box that grows until some are found
			boxradius=searchfactor*searchincrement;
			
			boxminx=treepositionx-boxradius;
			boxmaxx=treepositionx+boxradius;
			boxminy=treepositiony-boxradius;
			boxmaxy=treepositiony+boxradius;
			if dimensions>2
				boxminz=treepositionz-boxradius;
				boxmaxz=treepositionz+boxradius;
			end
			
			if dimensions==2
				closerremainingmask=(boxminx<=remainingpositionxs).*(remainingpositionxs<=boxmaxx).*(boxminy<=remainingpositionys).*(remainingpositionys<=boxmaxy);
			end
			if dimensions==3
				closerremainingmask=(boxminx<=remainingpositionxs).*(remainingpositionxs<=boxmaxx).*(boxminy<=remainingpositionys).*(remainingpositionys<=boxmaxy).*(boxminz<=remainingpositionzs).*(remainingpositionzs<=boxmaxz);
			end
			
			mindistancecandidates=[];
			mindistancecandidateindices=[];
			if sum(closerremainingmask)>0 % if there are remainingpositions inside the box, calculate the actual distances to them
				for k=find(closerremainingmask==1)
					branchcandidatex=remainingpositionxs(k);
					branchcandidatey=remainingpositionys(k);
					if dimensions>2
						branchcandidatez=remainingpositionzs(k);
					end
					branchcandidateindex=remainingpositionindices(k); % set up to remember the index of each prospective link destination
					
					if dimensions==2
						mindistancecandidate=((branchcandidatex-treepositionx).^2+(branchcandidatey-treepositiony).^2).^(1/2);
					end
					if dimensions==3
						mindistancecandidate=((branchcandidatex-treepositionx).^2+(branchcandidatey-treepositiony).^2+(branchcandidatez-treepositionz).^2).^(1/2);
					end
					if mindistancecandidate<=boxradius
						mindistancecandidates=[mindistancecandidates;mindistancecandidate];
						mindistancecandidateindices=[mindistancecandidateindices;branchcandidateindex];
					end
				end
			end
			if numel(mindistancecandidates)>0
				mindistance=[min(mindistancecandidates),treeindex,mindistancecandidateindices(find(mindistancecandidates==min(mindistancecandidates)))]; % minimum distances carry the indices of the prospective link destination
				mindistances=[mindistances;mindistance];
				
				occupiedbox=1;
			end
			searchfactor=searchfactor+1; % make the search box bigger
		end
	end
	branchdistance=min(mindistances(:,1));
	branchindex=find(branchdistance==mindistances(:,1));
	numberbranchcandidates=numel(branchindex);
	if numberbranchcandidates>1 % if there's a tie for minimum length
		branchindex=branchindex(ceil(numberbranchcandidates*rand(1)));
	end
	
	%	add this branch to the tree
	branch=mindistances(branchindex,:);
	branches=[branches;branch];
	
	%	add that location to the tree
	treeindices=[treeindices;branch(3)];
	lengthtreepositions=lengthtreepositions+1;
	
	%	remove that location from the remainingpositions
	remainingmask=ones(lengthremainingpositions,1);
	removed=find(remainingpositionindices==branch(3));
	remainingmask(removed)=0;
	
	remainingpositionindices=remainingpositionindices(find(remainingmask));
	remainingpositionxs=remainingpositionxs(find(remainingmask));
	remainingpositionys=remainingpositionys(find(remainingmask));
	if dimensions>2
		remainingpositionzs=remainingpositionzs(find(remainingmask));
	end

	lengthremainingpositions=lengthremainingpositions-1;
end

save('branches','branches')

%	for plotting
sizebranches=size(branches);
numberofbranches=sizebranches(1);

figure
hold on
box on
plot(inputpositions(:,1),inputpositions(:,2),'k.')

for i=linspace(1,numberofbranches,numberofbranches)
	startindex=branches(i,2);
	finishindex=branches(i,3);
	
	startx=inputpositions(startindex,1);
	starty=inputpositions(startindex,2);
	
	finishx=inputpositions(finishindex,1);
	finishy=inputpositions(finishindex,2);
	
	line([startx;finishx],[starty;finishy],'Color','k')
end