medfall

A super great game engine
Log | Files | Refs

clusterfit.cc (12146B)


      1 /* -----------------------------------------------------------------------------
      2 
      3 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
      4 	Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
      5 
      6 	Permission is hereby granted, free of charge, to any person obtaining
      7 	a copy of this software and associated documentation files (the 
      8 	"Software"), to	deal in the Software without restriction, including
      9 	without limitation the rights to use, copy, modify, merge, publish,
     10 	distribute, sublicense, and/or sell copies of the Software, and to 
     11 	permit persons to whom the Software is furnished to do so, subject to 
     12 	the following conditions:
     13 
     14 	The above copyright notice and this permission notice shall be included
     15 	in all copies or substantial portions of the Software.
     16 
     17 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
     19 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     20 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
     21 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
     22 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
     23 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24 	
     25    -------------------------------------------------------------------------- */
     26    
     27 #include "clusterfit.h"
     28 #include "colourset.h"
     29 #include "colourblock.h"
     30 #include <cfloat>
     31 
     32 namespace squish {
     33 
     34 ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 
     35   : ColourFit( colours, flags )
     36 {
     37 	// set the iteration count
     38 	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
     39 
     40 	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
     41 	if( metric )
     42 		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
     43 	else
     44 		m_metric = VEC4_CONST( 1.0f );	
     45 
     46 	// initialise the best error
     47 	m_besterror = VEC4_CONST( FLT_MAX );
     48 
     49 	// cache some values
     50 	int const count = m_colours->GetCount();
     51 	Vec3 const* values = m_colours->GetPoints();
     52 
     53 	// get the covariance matrix
     54 	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
     55 	
     56 	// compute the principle component
     57 	m_principle = ComputePrincipleComponent( covariance );
     58 }
     59 
     60 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
     61 {
     62 	// cache some values
     63 	int const count = m_colours->GetCount();
     64 	Vec3 const* values = m_colours->GetPoints();
     65 
     66 	// build the list of dot products
     67 	float dps[16];
     68 	u8* order = ( u8* )m_order + 16*iteration;
     69 	for( int i = 0; i < count; ++i )
     70 	{
     71 		dps[i] = Dot( values[i], axis );
     72 		order[i] = ( u8 )i;
     73 	}
     74 		
     75 	// stable sort using them
     76 	for( int i = 0; i < count; ++i )
     77 	{
     78 		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
     79 		{
     80 			std::swap( dps[j], dps[j - 1] );
     81 			std::swap( order[j], order[j - 1] );
     82 		}
     83 	}
     84 	
     85 	// check this ordering is unique
     86 	for( int it = 0; it < iteration; ++it )
     87 	{
     88 		u8 const* prev = ( u8* )m_order + 16*it;
     89 		bool same = true;
     90 		for( int i = 0; i < count; ++i )
     91 		{
     92 			if( order[i] != prev[i] )
     93 			{
     94 				same = false;
     95 				break;
     96 			}
     97 		}
     98 		if( same )
     99 			return false;
    100 	}
    101 	
    102 	// copy the ordering and weight all the points
    103 	Vec3 const* unweighted = m_colours->GetPoints();
    104 	float const* weights = m_colours->GetWeights();
    105 	m_xsum_wsum = VEC4_CONST( 0.0f );
    106 	for( int i = 0; i < count; ++i )
    107 	{
    108 		int j = order[i];
    109 		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
    110 		Vec4 w( weights[j] );
    111 		Vec4 x = p*w;
    112 		m_points_weights[i] = x;
    113 		m_xsum_wsum += x;
    114 	}
    115 	return true;
    116 }
    117 
    118 void ClusterFit::Compress3( void* block )
    119 {
    120 	// declare variables
    121 	int const count = m_colours->GetCount();
    122 	Vec4 const two = VEC4_CONST( 2.0 );
    123 	Vec4 const one = VEC4_CONST( 1.0f );
    124 	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
    125 	Vec4 const zero = VEC4_CONST( 0.0f );
    126 	Vec4 const half = VEC4_CONST( 0.5f );
    127 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
    128 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
    129 
    130 	// prepare an ordering using the principle axis
    131 	ConstructOrdering( m_principle, 0 );
    132 	
    133 	// check all possible clusters and iterate on the total order
    134 	Vec4 beststart = VEC4_CONST( 0.0f );
    135 	Vec4 bestend = VEC4_CONST( 0.0f );
    136 	Vec4 besterror = m_besterror;
    137 	u8 bestindices[16];
    138 	int bestiteration = 0;
    139 	int besti = 0, bestj = 0;
    140 	
    141 	// loop over iterations (we avoid the case that all points in first or last cluster)
    142 	for( int iterationIndex = 0;; )
    143 	{
    144 		// first cluster [0,i) is at the start
    145 		Vec4 part0 = VEC4_CONST( 0.0f );
    146 		for( int i = 0; i < count; ++i )
    147 		{
    148 			// second cluster [i,j) is half along
    149 			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
    150 			int jmin = ( i == 0 ) ? 1 : i;
    151 			for( int j = jmin;; )
    152 			{
    153 				// last cluster [j,count) is at the end
    154 				Vec4 part2 = m_xsum_wsum - part1 - part0;
    155 				
    156 				// compute least squares terms directly
    157 				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
    158 				Vec4 alpha2_sum = alphax_sum.SplatW();
    159 
    160 				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
    161 				Vec4 beta2_sum = betax_sum.SplatW();
    162 
    163 				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
    164 
    165 				// compute the least-squares optimal points
    166 				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
    167 				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
    168 				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
    169 
    170 				// clamp to the grid
    171 				a = Min( one, Max( zero, a ) );
    172 				b = Min( one, Max( zero, b ) );
    173 				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
    174 				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
    175 				
    176 				// compute the error (we skip the constant xxsum)
    177 				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
    178 				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
    179 				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
    180 				Vec4 e4 = MultiplyAdd( two, e3, e1 );
    181 
    182 				// apply the metric to the error term
    183 				Vec4 e5 = e4*m_metric;
    184 				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
    185 				
    186 				// keep the solution if it wins
    187 				if( CompareAnyLessThan( error, besterror ) )
    188 				{
    189 					beststart = a;
    190 					bestend = b;
    191 					besti = i;
    192 					bestj = j;
    193 					besterror = error;
    194 					bestiteration = iterationIndex;
    195 				}
    196 
    197 				// advance
    198 				if( j == count )
    199 					break;
    200 				part1 += m_points_weights[j];
    201 				++j;
    202 			}
    203 
    204 			// advance
    205 			part0 += m_points_weights[i];
    206 		}
    207 		
    208 		// stop if we didn't improve in this iteration
    209 		if( bestiteration != iterationIndex )
    210 			break;
    211 			
    212 		// advance if possible
    213 		++iterationIndex;
    214 		if( iterationIndex == m_iterationCount )
    215 			break;
    216 			
    217 		// stop if a new iteration is an ordering that has already been tried
    218 		Vec3 axis = ( bestend - beststart ).GetVec3();
    219 		if( !ConstructOrdering( axis, iterationIndex ) )
    220 			break;
    221 	}
    222 		
    223 	// save the block if necessary
    224 	if( CompareAnyLessThan( besterror, m_besterror ) )
    225 	{
    226 		// remap the indices
    227 		u8 const* order = ( u8* )m_order + 16*bestiteration;
    228 
    229 		u8 unordered[16];
    230 		for( int m = 0; m < besti; ++m )
    231 			unordered[order[m]] = 0;
    232 		for( int m = besti; m < bestj; ++m )
    233 			unordered[order[m]] = 2;
    234 		for( int m = bestj; m < count; ++m )
    235 			unordered[order[m]] = 1;
    236 
    237 		m_colours->RemapIndices( unordered, bestindices );
    238 		
    239 		// save the block
    240 		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
    241 
    242 		// save the error
    243 		m_besterror = besterror;
    244 	}
    245 }
    246 
    247 void ClusterFit::Compress4( void* block )
    248 {
    249 	// declare variables
    250 	int const count = m_colours->GetCount();
    251 	Vec4 const two = VEC4_CONST( 2.0f );
    252 	Vec4 const one = VEC4_CONST( 1.0f );
    253 	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
    254 	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
    255 	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
    256 	Vec4 const zero = VEC4_CONST( 0.0f );
    257 	Vec4 const half = VEC4_CONST( 0.5f );
    258 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
    259 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
    260 
    261 	// prepare an ordering using the principle axis
    262 	ConstructOrdering( m_principle, 0 );
    263 	
    264 	// check all possible clusters and iterate on the total order
    265 	Vec4 beststart = VEC4_CONST( 0.0f );
    266 	Vec4 bestend = VEC4_CONST( 0.0f );
    267 	Vec4 besterror = m_besterror;
    268 	u8 bestindices[16];
    269 	int bestiteration = 0;
    270 	int besti = 0, bestj = 0, bestk = 0;
    271 	
    272 	// loop over iterations (we avoid the case that all points in first or last cluster)
    273 	for( int iterationIndex = 0;; )
    274 	{
    275 		// first cluster [0,i) is at the start
    276 		Vec4 part0 = VEC4_CONST( 0.0f );
    277 		for( int i = 0; i < count; ++i )
    278 		{
    279 			// second cluster [i,j) is one third along
    280 			Vec4 part1 = VEC4_CONST( 0.0f );
    281 			for( int j = i;; )
    282 			{
    283 				// third cluster [j,k) is two thirds along
    284 				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
    285 				int kmin = ( j == 0 ) ? 1 : j;
    286 				for( int k = kmin;; )
    287 				{
    288 					// last cluster [k,count) is at the end
    289 					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
    290 
    291 					// compute least squares terms directly
    292 					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
    293 					Vec4 const alpha2_sum = alphax_sum.SplatW();
    294 					
    295 					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
    296 					Vec4 const beta2_sum = betax_sum.SplatW();
    297 					
    298 					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
    299 
    300 					// compute the least-squares optimal points
    301 					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
    302 					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
    303 					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
    304 
    305 					// clamp to the grid
    306 					a = Min( one, Max( zero, a ) );
    307 					b = Min( one, Max( zero, b ) );
    308 					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
    309 					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
    310 					
    311 					// compute the error (we skip the constant xxsum)
    312 					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
    313 					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
    314 					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
    315 					Vec4 e4 = MultiplyAdd( two, e3, e1 );
    316 
    317 					// apply the metric to the error term
    318 					Vec4 e5 = e4*m_metric;
    319 					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
    320 
    321 					// keep the solution if it wins
    322 					if( CompareAnyLessThan( error, besterror ) )
    323 					{
    324 						beststart = a;
    325 						bestend = b;
    326 						besterror = error;
    327 						besti = i;
    328 						bestj = j;
    329 						bestk = k;
    330 						bestiteration = iterationIndex;
    331 					}
    332 
    333 					// advance
    334 					if( k == count )
    335 						break;
    336 					part2 += m_points_weights[k];
    337 					++k;
    338 				}
    339 
    340 				// advance
    341 				if( j == count )
    342 					break;
    343 				part1 += m_points_weights[j];
    344 				++j;
    345 			}
    346 
    347 			// advance
    348 			part0 += m_points_weights[i];
    349 		}
    350 		
    351 		// stop if we didn't improve in this iteration
    352 		if( bestiteration != iterationIndex )
    353 			break;
    354 			
    355 		// advance if possible
    356 		++iterationIndex;
    357 		if( iterationIndex == m_iterationCount )
    358 			break;
    359 			
    360 		// stop if a new iteration is an ordering that has already been tried
    361 		Vec3 axis = ( bestend - beststart ).GetVec3();
    362 		if( !ConstructOrdering( axis, iterationIndex ) )
    363 			break;
    364 	}
    365 
    366 	// save the block if necessary
    367 	if( CompareAnyLessThan( besterror, m_besterror ) )
    368 	{
    369 		// remap the indices
    370 		u8 const* order = ( u8* )m_order + 16*bestiteration;
    371 
    372 		u8 unordered[16];
    373 		for( int m = 0; m < besti; ++m )
    374 			unordered[order[m]] = 0;
    375 		for( int m = besti; m < bestj; ++m )
    376 			unordered[order[m]] = 2;
    377 		for( int m = bestj; m < bestk; ++m )
    378 			unordered[order[m]] = 3;
    379 		for( int m = bestk; m < count; ++m )
    380 			unordered[order[m]] = 1;
    381 
    382 		m_colours->RemapIndices( unordered, bestindices );
    383 		
    384 		// save the block
    385 		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
    386 
    387 		// save the error
    388 		m_besterror = besterror;
    389 	}
    390 }
    391 
    392 } // namespace squish