clusterfit.cc (12146B)
1 /* ----------------------------------------------------------------------------- 2 3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk 4 Copyright (c) 2007 Ignacio Castano icastano@nvidia.com 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice shall be included 15 in all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 -------------------------------------------------------------------------- */ 26 27 #include "clusterfit.h" 28 #include "colourset.h" 29 #include "colourblock.h" 30 #include <cfloat> 31 32 namespace squish { 33 34 ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 35 : ColourFit( colours, flags ) 36 { 37 // set the iteration count 38 m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1; 39 40 // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f) 41 if( metric ) 42 m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f ); 43 else 44 m_metric = VEC4_CONST( 1.0f ); 45 46 // initialise the best error 47 m_besterror = VEC4_CONST( FLT_MAX ); 48 49 // cache some values 50 int const count = m_colours->GetCount(); 51 Vec3 const* values = m_colours->GetPoints(); 52 53 // get the covariance matrix 54 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() ); 55 56 // compute the principle component 57 m_principle = ComputePrincipleComponent( covariance ); 58 } 59 60 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration ) 61 { 62 // cache some values 63 int const count = m_colours->GetCount(); 64 Vec3 const* values = m_colours->GetPoints(); 65 66 // build the list of dot products 67 float dps[16]; 68 u8* order = ( u8* )m_order + 16*iteration; 69 for( int i = 0; i < count; ++i ) 70 { 71 dps[i] = Dot( values[i], axis ); 72 order[i] = ( u8 )i; 73 } 74 75 // stable sort using them 76 for( int i = 0; i < count; ++i ) 77 { 78 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j ) 79 { 80 std::swap( dps[j], dps[j - 1] ); 81 std::swap( order[j], order[j - 1] ); 82 } 83 } 84 85 // check this ordering is unique 86 for( int it = 0; it < iteration; ++it ) 87 { 88 u8 const* prev = ( u8* )m_order + 16*it; 89 bool same = true; 90 for( int i = 0; i < count; ++i ) 91 { 92 if( order[i] != prev[i] ) 93 { 94 same = false; 95 break; 96 } 97 } 98 if( same ) 99 return false; 100 } 101 102 // copy the ordering and weight all the points 103 Vec3 const* unweighted = m_colours->GetPoints(); 104 float const* weights = m_colours->GetWeights(); 105 m_xsum_wsum = VEC4_CONST( 0.0f ); 106 for( int i = 0; i < count; ++i ) 107 { 108 int j = order[i]; 109 Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f ); 110 Vec4 w( weights[j] ); 111 Vec4 x = p*w; 112 m_points_weights[i] = x; 113 m_xsum_wsum += x; 114 } 115 return true; 116 } 117 118 void ClusterFit::Compress3( void* block ) 119 { 120 // declare variables 121 int const count = m_colours->GetCount(); 122 Vec4 const two = VEC4_CONST( 2.0 ); 123 Vec4 const one = VEC4_CONST( 1.0f ); 124 Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f ); 125 Vec4 const zero = VEC4_CONST( 0.0f ); 126 Vec4 const half = VEC4_CONST( 0.5f ); 127 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); 128 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); 129 130 // prepare an ordering using the principle axis 131 ConstructOrdering( m_principle, 0 ); 132 133 // check all possible clusters and iterate on the total order 134 Vec4 beststart = VEC4_CONST( 0.0f ); 135 Vec4 bestend = VEC4_CONST( 0.0f ); 136 Vec4 besterror = m_besterror; 137 u8 bestindices[16]; 138 int bestiteration = 0; 139 int besti = 0, bestj = 0; 140 141 // loop over iterations (we avoid the case that all points in first or last cluster) 142 for( int iterationIndex = 0;; ) 143 { 144 // first cluster [0,i) is at the start 145 Vec4 part0 = VEC4_CONST( 0.0f ); 146 for( int i = 0; i < count; ++i ) 147 { 148 // second cluster [i,j) is half along 149 Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f ); 150 int jmin = ( i == 0 ) ? 1 : i; 151 for( int j = jmin;; ) 152 { 153 // last cluster [j,count) is at the end 154 Vec4 part2 = m_xsum_wsum - part1 - part0; 155 156 // compute least squares terms directly 157 Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 ); 158 Vec4 alpha2_sum = alphax_sum.SplatW(); 159 160 Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 ); 161 Vec4 beta2_sum = betax_sum.SplatW(); 162 163 Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW(); 164 165 // compute the least-squares optimal points 166 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); 167 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; 168 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; 169 170 // clamp to the grid 171 a = Min( one, Max( zero, a ) ); 172 b = Min( one, Max( zero, b ) ); 173 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; 174 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; 175 176 // compute the error (we skip the constant xxsum) 177 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); 178 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); 179 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); 180 Vec4 e4 = MultiplyAdd( two, e3, e1 ); 181 182 // apply the metric to the error term 183 Vec4 e5 = e4*m_metric; 184 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); 185 186 // keep the solution if it wins 187 if( CompareAnyLessThan( error, besterror ) ) 188 { 189 beststart = a; 190 bestend = b; 191 besti = i; 192 bestj = j; 193 besterror = error; 194 bestiteration = iterationIndex; 195 } 196 197 // advance 198 if( j == count ) 199 break; 200 part1 += m_points_weights[j]; 201 ++j; 202 } 203 204 // advance 205 part0 += m_points_weights[i]; 206 } 207 208 // stop if we didn't improve in this iteration 209 if( bestiteration != iterationIndex ) 210 break; 211 212 // advance if possible 213 ++iterationIndex; 214 if( iterationIndex == m_iterationCount ) 215 break; 216 217 // stop if a new iteration is an ordering that has already been tried 218 Vec3 axis = ( bestend - beststart ).GetVec3(); 219 if( !ConstructOrdering( axis, iterationIndex ) ) 220 break; 221 } 222 223 // save the block if necessary 224 if( CompareAnyLessThan( besterror, m_besterror ) ) 225 { 226 // remap the indices 227 u8 const* order = ( u8* )m_order + 16*bestiteration; 228 229 u8 unordered[16]; 230 for( int m = 0; m < besti; ++m ) 231 unordered[order[m]] = 0; 232 for( int m = besti; m < bestj; ++m ) 233 unordered[order[m]] = 2; 234 for( int m = bestj; m < count; ++m ) 235 unordered[order[m]] = 1; 236 237 m_colours->RemapIndices( unordered, bestindices ); 238 239 // save the block 240 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); 241 242 // save the error 243 m_besterror = besterror; 244 } 245 } 246 247 void ClusterFit::Compress4( void* block ) 248 { 249 // declare variables 250 int const count = m_colours->GetCount(); 251 Vec4 const two = VEC4_CONST( 2.0f ); 252 Vec4 const one = VEC4_CONST( 1.0f ); 253 Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); 254 Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); 255 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); 256 Vec4 const zero = VEC4_CONST( 0.0f ); 257 Vec4 const half = VEC4_CONST( 0.5f ); 258 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); 259 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); 260 261 // prepare an ordering using the principle axis 262 ConstructOrdering( m_principle, 0 ); 263 264 // check all possible clusters and iterate on the total order 265 Vec4 beststart = VEC4_CONST( 0.0f ); 266 Vec4 bestend = VEC4_CONST( 0.0f ); 267 Vec4 besterror = m_besterror; 268 u8 bestindices[16]; 269 int bestiteration = 0; 270 int besti = 0, bestj = 0, bestk = 0; 271 272 // loop over iterations (we avoid the case that all points in first or last cluster) 273 for( int iterationIndex = 0;; ) 274 { 275 // first cluster [0,i) is at the start 276 Vec4 part0 = VEC4_CONST( 0.0f ); 277 for( int i = 0; i < count; ++i ) 278 { 279 // second cluster [i,j) is one third along 280 Vec4 part1 = VEC4_CONST( 0.0f ); 281 for( int j = i;; ) 282 { 283 // third cluster [j,k) is two thirds along 284 Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f ); 285 int kmin = ( j == 0 ) ? 1 : j; 286 for( int k = kmin;; ) 287 { 288 // last cluster [k,count) is at the end 289 Vec4 part3 = m_xsum_wsum - part2 - part1 - part0; 290 291 // compute least squares terms directly 292 Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) ); 293 Vec4 const alpha2_sum = alphax_sum.SplatW(); 294 295 Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) ); 296 Vec4 const beta2_sum = betax_sum.SplatW(); 297 298 Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW(); 299 300 // compute the least-squares optimal points 301 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); 302 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; 303 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; 304 305 // clamp to the grid 306 a = Min( one, Max( zero, a ) ); 307 b = Min( one, Max( zero, b ) ); 308 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; 309 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; 310 311 // compute the error (we skip the constant xxsum) 312 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); 313 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); 314 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); 315 Vec4 e4 = MultiplyAdd( two, e3, e1 ); 316 317 // apply the metric to the error term 318 Vec4 e5 = e4*m_metric; 319 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); 320 321 // keep the solution if it wins 322 if( CompareAnyLessThan( error, besterror ) ) 323 { 324 beststart = a; 325 bestend = b; 326 besterror = error; 327 besti = i; 328 bestj = j; 329 bestk = k; 330 bestiteration = iterationIndex; 331 } 332 333 // advance 334 if( k == count ) 335 break; 336 part2 += m_points_weights[k]; 337 ++k; 338 } 339 340 // advance 341 if( j == count ) 342 break; 343 part1 += m_points_weights[j]; 344 ++j; 345 } 346 347 // advance 348 part0 += m_points_weights[i]; 349 } 350 351 // stop if we didn't improve in this iteration 352 if( bestiteration != iterationIndex ) 353 break; 354 355 // advance if possible 356 ++iterationIndex; 357 if( iterationIndex == m_iterationCount ) 358 break; 359 360 // stop if a new iteration is an ordering that has already been tried 361 Vec3 axis = ( bestend - beststart ).GetVec3(); 362 if( !ConstructOrdering( axis, iterationIndex ) ) 363 break; 364 } 365 366 // save the block if necessary 367 if( CompareAnyLessThan( besterror, m_besterror ) ) 368 { 369 // remap the indices 370 u8 const* order = ( u8* )m_order + 16*bestiteration; 371 372 u8 unordered[16]; 373 for( int m = 0; m < besti; ++m ) 374 unordered[order[m]] = 0; 375 for( int m = besti; m < bestj; ++m ) 376 unordered[order[m]] = 2; 377 for( int m = bestj; m < bestk; ++m ) 378 unordered[order[m]] = 3; 379 for( int m = bestk; m < count; ++m ) 380 unordered[order[m]] = 1; 381 382 m_colours->RemapIndices( unordered, bestindices ); 383 384 // save the block 385 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); 386 387 // save the error 388 m_besterror = besterror; 389 } 390 } 391 392 } // namespace squish