Source code for pyopus.optimizer.sanm

# -*- coding: UTF-8 -*-
"""
.. inheritance-diagram:: pyopus.optimizer.sanm
    :parts: 1

**Unconstrained successive approximation Nelder-Mead simplex optimizer 
(PyOPUS subsystem name: SANMOPT)**

A provably convergent version of the Nelder-Mead simplex algorithm. The 
algorithm performs unconstrained optimization. Convergence is achieved by 
performing optimization on gradually refined approximations of the cost 
function and keeping the simplex internal angles away from 0. Function 
approximations are constructed with the help of a regular grid of points. 

The algorithm was published in 

.. [sanm] Bűrmen Á., Tuma T.: Unconstrained derivative-free optimization by 
          successive approximation. Journal of computational and applied mathematics, 
	  vol. 223, pp. 62-74, 2009. 
"""

from ..misc.debug import DbgMsgOut, DbgMsg
from .base import Optimizer
from .nm import NelderMead

from numpy import abs, argsort, where, round, sign, diag, sqrt, log, array, zeros, dot
from numpy.linalg import qr, det

# import matplotlib.pyplot as pl

__all__ = [ 'SANelderMead' ]

[docs]class SANelderMead(NelderMead): 
	"""
	Unconstrained successive approximation Nelder-Mead optimizer class
	
	Default values of the expansion (1.2) and shrink (0.25) coefficients are 
	different from the original Nelder-Mead values. Different are also the 
	values of relative tolerance (1e-16), and absolute function (1e-16) and 
	side length size (1e-9) tolerance. 
	
	*lam* and *Lam* are the lower and upper bound on the simplex side length 
	with respect to the grid. The shape (side length determinant) is bounded 
	with respect to the product of simplex side lengths with *c*. 
	
	The grid density has a continuity bound due to the finite precision of 
	floating point numbers. Therefore the grid begins to behave as continuous 
	when its density falls below the relative(*tau_r*) and absolute (*tau_a*) 
	bound with respect to the grid origin. 
	
	See the :class:`~pyopus.optimizer.nm.NelderMead` class for more 
	information. 
	"""
	def __init__(self, function, debug=0, fstop=None, maxiter=None, 
					reflect=1.0, expand=1.2, outerContract=0.5, innerContract=-0.5, shrink=0.25, 
					reltol=1e-15, ftol=1e-15, xtol=1e-9, simplex=None, 
					lam=2.0, Lam=2.0**52, c=1e-6, tau_r=2.0**(-52), tau_a=1e-100):
		NelderMead.__init__(self, function, debug, fstop, maxiter, 
										reflect, expand, outerContract, innerContract, shrink, 
										reltol, ftol, xtol, simplex)
		
		# Simplex
		self.simplex=None
		
		# Side vector determinant
		self.logDet=None
		
		# Grid origin and scaling
		self.z=None
		self.Delta=None
		
		# Side length bounds wrt. grid 
		self.lam=lam
		self.Lam=Lam
		
		# Simplex shape lower bound
		self.c=c
		
		# Grid continuity bound (relative and absolute)
		self.tau_r=tau_r
		self.tau_a=tau_a
		
[docs]	def check(self):
		"""
		Checks the optimization algorithm's settings and raises an exception if 
		something is wrong. 
		"""
		NelderMead.check(self)
		
		if self.lam<=0:
			raise Exception(DbgMsg("SANMOPT", "lambda should be positive."))
			
		if self.Lam<=0:
			raise Exception(DbgMsg("SANMOPT", "Lambda should be positive."))
			
		if self.lam>self.Lam:
			raise Exception(DbgMsg("SANMOPT", "Lambda should be greater or equal lambda."))
		
		if self.c<0:
			raise Exception(DbgMsg("SANMOPT", "c should be greater or equal zero."))
		
		if self.tau_r<0 or self.tau_a<0:
			raise Exception(DbgMsg("SANMOPT", "Relative and absolute grid continuity bounds should be positive."))

[docs]	def buildGrid(self, density=10.0):
		"""
		Generates the intial grid density for the algorithm. The grid is 
		determined relative to the bounding box of initial simplex sides. 
		*density* specifies the number of points in every grid direction that 
		covers the corresponding side of the bounding box. 
		
		If any side of the bounding box has zero length, the mean of all side 
		lengths divided by *density* is used as grid density in the 
		corresponding direction. 
		
		Returns the 1-dimensional array of length *ndim* holding the grid 
		densities. 
		"""
		if self.debug:
			DbgMsgOut("SANMOPT", "Building initial grid for initial simplex.")
		# Side vectors (to the first point)
		v=self.simplex[1:,:]-self.simplex[0,:]
		
		# Maximal absolute components
		vmax=abs(v).max(0)
		
		# Mean maximal absolute component
		vmax_mean=vmax.mean()
		
		# If any component maximum is 0, set it to the mean vmax value
		ndx=where(vmax==0.0)[0]
		vmax[ndx]=vmax_mean
		
		return vmax/density
	
[docs]	def continuityBound(self):
		"""
		Finds the components of the vector for which the corresponding grid 
		density is below the continuity bound. 
		
		Returns a tuple (*delta_prime*, *ndx_cont*). *delta_prime* is the 
		vector of grid densities where the components that are below the 
		continuity bound are replaced with the bound. *ndx_cont* is a vector 
		of grid component indices for which the grid is below the continuity 
		bound. 
		"""
		# Find continuous components (delta below grid continuity or 0.0)
		rtol=abs(self.z*self.tau_r)
		conttol=where(rtol>self.tau_a, rtol, self.tau_a)
		ndx_cont=where((self.delta<conttol) | (self.delta==0.0))[0]
		
		# If below tolerance, set delta to tolerance
		delta_prime=where(self.delta>conttol, self.delta, conttol)
		
		return (delta_prime, ndx_cont)
		
[docs]	def gridRestrain(self, x):
		"""
		Returns the point on the grid that is closest to *x*. The componnets 
		of *x* that correspond to the grid directions for whch the density is 
		below the continuity bound are left unchanged. The remaining components 
		are rounded to the nearest value on the grid. 
		"""
		# Get bound delta and indices where delta was bound
		(delta_work, ndx_cont)=self.continuityBound()
		
		# Grid-restrain
		xgr=round((x-self.z)/delta_work)*delta_work+self.z
		
		# Copy continuous components
		xgr[ndx_cont]=x[ndx_cont]
		
		return xgr
		
[docs]	def grfun(self, x, count=True):
		"""
		Returns the value of the cost function approximation at *x* 
		corresponding to the current grid. If *count* is ``False`` the cost 
		function evaluation happened for debugging purposes and is not counted 
		or registered in any way.  
		"""
		return self.fun(self.gridRestrain(x), count)
	
[docs]	def sortedSideVectors(self):
		"""
		Returns a tuple (*vsorted*, *lsorted*) where *vsorted* is an array 
		holding the simplex side vectors sorted by their length with longest 
		side first. The first index of the 2-dimensional array is the side 
		vector index while the second one is the component index. *lsorted* 
		is a 1-dimensional array of corresponding simplex side lengths. 
		"""
		# Side vectors
		v=self.simplex[1:,:]-self.simplex[0,:]
		
		# Get length
		l2=(v*v).sum(1)
		
		# Order by length (longest first)
		i=argsort(l2, 0, 'mergesort')	# shortest first
		i=i[-1::-1]	# longest first
		
		vsorted=v[i,:]
		lsorted=sqrt(l2[i])
		
		return (vsorted, lsorted)
		
[docs]	def reshape(self, v):
		"""
		Reshapes simpex side vectors given by *v* into orthogonal sides with 
		their bounding box bounded in length by *lam* and *Lam* with respect 
		to the grid density. 
		
		Returns a tuple (*vnew*, *logDet*, *l*) where *vnew* holds the reshaped 
		simplex sides, *logDet* is the natural logarithm of the reshaped 
		simplex's determinant, and *l* is the 1-dimensional array of reshaped 
		side lengths. 
		
		*logDet* is in fact the natural logarithm of the side lengths product, 
		because the reshaped sides are orthogonal. 
		"""
		# Rows are side vectors
		
		# QR decomposition of a matrix with side vectors as columns
		(Q, R)=qr(v.T)
		
		# Get scaling factors and their signs 
		Rdiag=R.diagonal()
		Rsign=sign(Rdiag)
		Rsign=where(Rsign!=0, Rsign, 1.0)
		
		# Get side lengths
		l=abs(Rdiag)
		
		# Calculate side length bounds
		norm_delta=sqrt((self.delta**2).sum())
		lower=self.lam*sqrt(self.ndim)*norm_delta
		upper=self.Lam*sqrt(self.ndim)*norm_delta
				
		# Bound side length
		l=where(l<=upper, l, upper)
		l=where(l>=lower, l, lower)
		
		# Scale vectors
		# Vectors are in columns of Q. Therefore transpose Q. 
		vnew=dot(diag(l*Rsign), Q.T)
		
		# Calculate log of side vector determinat
		logDet=log(l).sum()
		
		return (vnew, logDet, l)
		
[docs]	def reset(self, x0):
		"""
		Puts the optimizer in its initial state and sets the initial point to 
		be the 1-dimensional array or list *x0*. The length of the array 
		becomes the dimension of the optimization problem 
		(:attr:`ndim` member). 
		
		The initial simplex is built around *x0* by calling the 
		:meth:`buildSimplex` method with default values for the *rel* and *abs* 
		arguments. 
		
		If *x0* is a 2-dimensional array or list of size 
		(*ndim*+1) times *ndim* it specifies the initial simplex. 
		
		A corresponding grid is created by calling the :meth:`buildGrid` method. 
		
		The initial value of the natural logarithm of the simplex side vectors 
		determinant is calculated and stored. This value gets updated at every 
		simplex algorithm step. The only time it needs to be reevaluated is at 
		reshape. But that is also quite simple because the reshaped simplex 
		is orthogonal. The only place where a full determinant needs to be 
		calculated is here. 
		"""
		# Debug message
		if self.debug:
			DbgMsgOut("SANMOPT", "Resetting.")
		
		# Make it an array
		x0=array(x0)
		
		# Is x0 a point or a simplex?
		if x0.ndim==1:
			# Point
			# Set x now
			NelderMead.reset(self, x0)
			
			if self.debug:
				DbgMsgOut("SANMOPT", "Generating initial simplex from initial point.")
				
			sim=self.buildSimplex(x0)
			self._setSimplex(sim)
			self.delta=self.buildGrid()
			self.z=x0
		else:
			# Simplex or error (handled in _setSimplex())
			self._setSimplex(x0)
			self.delta=self.buildGrid()
			self.z=x0[0,:]
			
			if self.debug:
				DbgMsgOut("SANMOPT", "Using specified initial simplex.")
				
			# Set x to first point in simplex after it was checked in _setSimplex()
			Optimizer.reset(self, x0[0,:])
		
		# Reset point moves counter 
		self.simplexmoves=zeros(self.ndim+1)
		
		# Calculate log of side vector determinat
		(v, l)=self.sortedSideVectors()
		self.logDet=log(abs(det(v)))
		
		# Make x tolerance an array
		self.xtol=array(self.xtol)
	
[docs]	def run(self):
		"""
		Runs the optimization algorithm. 
		"""
		# Debug message
		if self.debug:
			DbgMsgOut("SANMOPT", "Starting a run at i="+str(self.niter))

		# Checks
		self.check()
		
		# Reset stop flag
		self.stop=False
		
		# Evaluate initial simplex if needed
		if self.simplexf is None:
			self.simplexf=zeros(self.npts)
			for i in range(0, self.ndim+1):
				self.simplexf[i]=self.grfun(self.simplex[i,:])
				
				if self.debug:
					DbgMsgOut("SANMOPT", "Initial simplex point i="+str(self.niter)+": f="+str(self.simplexf[i]))
		
		# Loop
		while not self.stop:
			# Order simplex (best point first)
			self.orderSimplex()
			
			# Centroid
			xc=self.simplex[:-1,:].sum(0)/self.ndim
			
			# Worst point
			xw=self.simplex[-1,:]
			fw=self.simplexf[-1]
			
			# Second worst point
			xsw=self.simplex[-2,:]
			fsw=self.simplexf[-2]
			
			# Best point
			xb=self.simplex[0,:]
			fb=self.simplexf[0]
			
			# No shrink
			shrink=False
			
			# Reflect
			xr=xc+(xc-xw)*self.reflect
			fr=self.grfun(xr)
			if self.debug:
				DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": reflect   : f="+str(fr))
			
			if fr<fb:
				# Try expansion
				xe=xc+(xc-xw)*self.expand
				fe=self.grfun(xe)
				if self.debug:
					DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": expand    : f="+str(fe))
				
				if fe<fr:
					# Accept expansion
					self.simplex[-1,:]=xe
					self.simplexf[-1]=fe
					self.simplexmoves[-1]+=1
					self.logDet+=log(abs(self.expand))
					
					if self.debug:
						DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted expansion")
				else:
					# Accept reflection
					self.simplex[-1,:]=xr
					self.simplexf[-1]=fr
					self.simplexmoves[-1]+=1
					self.logDet+=log(abs(self.reflect))
					
					if self.debug:
						DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted reflection after expansion")
			elif fb<=fr and fr<fsw:
				# Accept reflection
				self.simplex[-1,:]=xr
				self.simplexf[-1]=fr
				self.simplexmoves[-1]+=1
				self.logDet+=log(abs(self.reflect))
				if self.debug:
					DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted reflection")
			elif fsw<=fr and fr<fw:
				# Try outer contraction
				xo=xc+(xc-xw)*self.outerContract
				fo=self.grfun(xo)
				if self.debug:
					DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": outer con : f="+str(fo))
				if fo<fw:
					# Accept
					self.simplex[-1,:]=xo
					self.simplexf[-1]=fo
					self.simplexmoves[-1]+=1
					self.logDet+=log(abs(self.outerContract))
					if self.debug:
						DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted outer contraction")
				else:
					# Shrink
					shrink=True
			elif fw<=fr:
				# Try inner contraction
				xi=xc+(xc-xw)*self.innerContract
				fi=self.grfun(xi)
				if self.debug:
					DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": inner con : f="+str(fi))
				if fi<fw:
					# Accept
					self.simplex[-1,:]=xi
					self.simplexf[-1]=fi
					self.simplexmoves[-1]+=1
					self.logDet+=log(abs(self.innerContract))
					
					if self.debug:
						DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted inner contraction")
				else:
					# Shrink
					shrink=True
					
			# self._checkSimplex()
			# self._checkLogDet()
			
			# self._plotSimplex()
			
			# Reshape, pseudo-expand, and shrink loop
			if shrink:
				# Normal NM steps failed
				
				# No reshape happened yet
				reshaped=False
				
				# Create origin vector and function value
				x0=zeros(self.ndim)
				f0=0.0
				
				# Check simplex shape
				# Simplex is already sorted
				(v, l)=self.sortedSideVectors()
				if self.logDet-log(l).sum()<log(self.c)*self.ndim:
					# Shape not good, reshape
					(v, self.logDet, l)=self.reshape(v)
					reshaped=True
										
					# Origin for building the new simplex
					x0[:]=self.simplex[0,:]
					f0=self.simplexf[0]
					
					# Build new simplex
					self.simplex[1:,:]=v+x0
					
					# Evaluate new points
					for i in range(1, self.ndim+1):
						f=self.grfun(self.simplex[i,:])
						self.simplexf[i]=f
						if self.debug:
							DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": reshape   : f="+str(f))
					
					self.simplexmoves[:]=0
					
				# Do not order simplex here, even if reshape results in a point that improves over x0. 
				# The algorithm in the paper orders the simplex here. This is not in the sense of the
				# Price-Coope-Byatt paper, which introduced pseudo-expand. Therefore do not sort. 
				
				# Centroid of the n worst points
				xcw=self.simplex[1:,:].sum(0)/self.ndim
				
				# Pseudo-expand point
				xpe=xb+(self.expand/self.reflect-1.0)*(xb-xcw)
				fpe=self.grfun(xpe)
				if self.debug:
					DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": pseudo exp: f="+str(fpe))
				
				# Check if there is any improvement
				if fpe<fb: 
					# Pseudo-expand point is better than old best point
					self.simplex[0,:]=xpe
					self.simplexf[0]=fpe 
					self.simplexmoves[0]+=1
					
					# Error in the paper - should be gammae/gammar, not gammae/gammar-1
					self.logDet+=log(abs(self.expand/self.reflect))	
					
					if self.debug:
						DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted pseudo exp")
				elif self.simplexf.min()<fb: 
					# One of the points obtained by reshape is better than old best point
					if self.debug:
						DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": accepted reshape")
				else:
					# No improvement, enter shrink loop
					
					# Even though we had a reshape and the simplex was reordered, 
					# if we end up here the ordering didn't change the first point 
					# of the simplex (it must still be the best point). If not, 
					# we would end up in the if branch. 
					
					if not reshaped:
						# No reshape yet, reshape now
						(v, l)=self.sortedSideVectors()
						(v, self.logDet, l)=self.reshape(v)
						reshaped=True
						
						# Origin for building the new simplex and for shrink steps
						x0[:]=self.simplex[0,:]
						f0=self.simplexf[0]
						
						self.simplexmoves[:]=0
						
						if self.debug:
							DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": reshape")
						
						# This is the first shrink step
						shrink_step=0
					else:
						# This is the second shrink step 
						# (first one happened at reshape and failed to produce improvement)
						shrink_step=1
					
					# Shrink loop
					while self.simplexf.min()>=f0:
						# Reverse side vectors if this is not the first shrink step
						if shrink_step>0:
							v=-v
						
						# If not first even shrink step, shrink vectors and check grid
						if shrink_step>=2 and shrink_step % 2 == 0:
							# Shrink vectors
							v=v*self.shrink
							l=l*self.shrink
							self.logDet+=log(abs(self.shrink))*self.ndim
							if self.debug:
								DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": shrink vectors")
							
							# Find shortest side vector
							i=argsort(l, 0, 'mergesort')
							lmin=l[i[0]]
							vmin=v[i[0],:]
							
							# Do we need a new grid?
							if lmin < self.lam*sqrt(self.ndim)*sqrt((self.delta**2).sum()):
								# New grid origin
								self.z=self.gridRestrain(x0)
								
								# Move x0 to grid origin to resolve inconsistency between 
								# the old and the new grid-restrained value of f. 
								x0=self.z
								self.simplex[0,:]=x0
								
								# New (refined) grid
								vmin_norm=sqrt((vmin**2).sum())/sqrt(self.ndim)
								abs_vmin=abs(vmin)
								self.delta=1.0/(128.0*self.lam*self.ndim)*where(abs_vmin>vmin_norm, abs_vmin, vmin_norm)
								
								if self.debug:
									DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": refine grid")
								
						# Evaluate points
						self.simplex[1:,:]=x0+v
						for i in range(1, self.ndim+1):
							f=self.grfun(self.simplex[i,:])
							self.simplexf[i]=f
							if self.debug:
								DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+(": shrink %1d: f=" % (shrink_step % 2))+str(f))
						
						# self._checkSimplex()
						# self._checkLogDet()
						
						# self._plotSimplex()
						
						# if f0!=self.simplexf[0] or (x0!=self.simplex[0,:]).any():
						# 	raise Exception("x0, f0 not matching.")
						
						# Stopping condition
						if (self.checkFtol() and self.checkXtol()) or self.stop:
							break
						
						# Increase shrink step counter
						shrink_step+=1
				
			# Check stopping condition
			if self.checkFtol() and self.checkXtol(): 
				if self.debug:
					DbgMsgOut("SANMOPT", "Iteration i="+str(self.niter)+": simplex x and f tolerance reached, stopping.")
				break
			
		# Debug message
		if self.debug:
			DbgMsgOut("SANMOPT", "Finished.")
	
	#
	# Internal functions for debugging purposes
	#
	
	def _checkSimplex(self):
		"""
		Check if the approximate cost function values corresponding to simplex 
		points are correct. 
		"""
		for i in range(0, self.ndim+1):
			ff=self.simplexf[i]
			f=self.grfun(self.simplex[i,:], False)
			
			if ff!=f and self.debug:
				DbgMsgOut("SANMOPT", "Simplex consistency broken for member #"+str(i))
	
	def _checkLogDet(self):
		"""
		Check if the natural logarithm of the simplex side vectors is correct. 
		"""
		(v,l)=self.sortedSideVectors()
		vdet=abs(det(v))
		DbgMsgOut("SANMOPT", " logDet="+str(exp(self.logDet))+"  vdet="+str(vdet))
		if (1.0-exp(self.logDet)/vdet)>1e-3:
			raise Exception(DbgMsG("SANMOPT", "Simplex determinat consistency broken. Relative error: %e" % (1.0-exp(self.logDet)/vdet)))
			
	def _plotSimplex(self):
		"""
		Plot the projection of simplex side vectors to the first two dimensions. 
		"""
		p1=self.simplex[0,:2]
		p2=self.simplex[1,:2]
		p3=self.simplex[2,:2]
		
		pl.clf()
		pl.plot([p1[0]], [p1[1]], 'ro')
		pl.plot([p2[0]], [p2[1]], 'go')
		pl.plot([p3[0]], [p3[1]], 'bo')
		pl.plot([p1[0], p2[0]], [p1[1], p2[1]], 'b')
		pl.plot([p1[0], p3[0]], [p1[1], p3[1]], 'b')
		pl.axis('equal')
		pl.show()