commit e5dd32c9247cdd9e2c888c011b177428a1a907c7 parent 5eb105103b42abbd72e0553b3a0517216c802ff4 Author: Michael Savage <mikejsavage@gmail.com> Date: Tue Oct 18 20:14:08 +0300 Better quantize01/dequantize01, add quantize11/dequantize11 Diffstat:
int_conversions.h | | | 75 | ++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
diff --git a/int_conversions.h b/int_conversions.h @@ -12,8 +12,8 @@ template< typename T > s64 check_s64( T x ) { - if( IS_SIGNED( T ) ) assert( x >= S64_MIN ); - assert( x <= S64_MAX ); + if( IS_SIGNED( T ) ) ASSERT( x >= S64_MIN ); + ASSERT( x <= S64_MAX ); return ( s64 ) x; } @@ -21,15 +21,15 @@ template< typename T > s64 check_s64( T x ) { #pragma GCC diagnostic push // GCC warns even though the optimiser removes the branch #pragma GCC diagnostic ignored "-Wsign-compare" - if( IS_SIGNED( T ) ) assert( x >= S64_MIN ); + if( IS_SIGNED( T ) ) ASSERT( x >= S64_MIN ); #pragma GCC diagnostic pop - assert( x <= S64_MAX ); + ASSERT( x <= S64_MAX ); return ( s64 ) x; } template< typename T > s32 check_s32( T x ) { - if( IS_SIGNED( T ) ) assert( x >= S32_MIN ); - assert( x <= S32_MAX ); + if( IS_SIGNED( T ) ) ASSERT( x >= S32_MIN ); + ASSERT( x <= S32_MAX ); return ( s32 ) x; } @@ -37,15 +37,15 @@ template< typename T > s32 check_s32( T x ) { #pragma GCC diagnostic push // GCC warns even though the optimiser removes the branch #pragma GCC diagnostic ignored "-Wsign-compare" - if( IS_SIGNED( T ) ) assert( x >= S32_MIN ); + if( IS_SIGNED( T ) ) ASSERT( x >= S32_MIN ); #pragma GCC diagnostic pop - assert( x <= S32_MAX ); + ASSERT( x <= S32_MAX ); return ( s32 ) x; } template< typename T > s16 check_s16( T x ) { - if( IS_SIGNED( T ) ) assert( x >= S16_MIN ); - assert( x <= S16_MAX ); + if( IS_SIGNED( T ) ) ASSERT( x >= S16_MIN ); + ASSERT( x <= S16_MAX ); return ( s16 ) x; } @@ -53,15 +53,15 @@ template< typename T > s16 check_s16( T x ) { #pragma GCC diagnostic push // GCC warns even though the optimiser removes the branch #pragma GCC diagnostic ignored "-Wsign-compare" - if( IS_SIGNED( T ) ) assert( x >= S16_MIN ); + if( IS_SIGNED( T ) ) ASSERT( x >= S16_MIN ); #pragma GCC diagnostic pop - assert( x <= S16_MAX ); + ASSERT( x <= S16_MAX ); return ( s16 ) x; } template< typename T > s8 check_s8( T x ) { - if( IS_SIGNED( T ) ) assert( x >= S8_MIN ); - assert( x <= S8_MAX ); + if( IS_SIGNED( T ) ) ASSERT( x >= S8_MIN ); + ASSERT( x <= S8_MAX ); return ( s8 ) x; } @@ -69,14 +69,14 @@ template< typename T > s8 check_s8( T x ) { #pragma GCC diagnostic push // GCC warns even though the optimiser removes the branch #pragma GCC diagnostic ignored "-Wsign-compare" - if( IS_SIGNED( T ) ) assert( x >= S8_MIN ); + if( IS_SIGNED( T ) ) ASSERT( x >= S8_MIN ); #pragma GCC diagnostic pop - assert( x <= S8_MAX ); + ASSERT( x <= S8_MAX ); return ( s8 ) x; } template< typename T > u64 check_u64( T x ) { - assert( x >= 0 && x <= U64_MAX ); + ASSERT( x >= 0 && x <= U64_MAX ); return ( u64 ) x; } @@ -89,7 +89,7 @@ u64 clamp_u64( T x ) { template< typename T > u32 check_u32( T x ) { - assert( x >= 0 && x <= U32_MAX ); + ASSERT( x >= 0 && x <= U32_MAX ); return ( u32 ) x; } @@ -102,7 +102,7 @@ u32 clamp_u32( T x ) { template< typename T > u16 check_u16( T x ) { - assert( x >= 0 && x <= U16_MAX ); + ASSERT( x >= 0 && x <= U16_MAX ); return ( u16 ) x; } @@ -115,7 +115,7 @@ u16 clamp_u16( T x ) { template< typename T > u8 check_u8( T x ) { - assert( x >= 0 && x <= U8_MAX ); + ASSERT( x >= 0 && x <= U8_MAX ); return ( u8 ) x; } @@ -144,13 +144,38 @@ INLINE u64 to_unsigned( s64 x ) { return ( u64 ) x; } * quantization */ -INLINE u32 quantize( float x, u32 num_bits ) { - assert( x >= 0 && x <= 1 ); - return u32( x * float( ( 1 << num_bits ) - 1 ) + 0.5f ); +INLINE u32 quantize01( float x, u32 num_bits ) { + ASSERT( x >= 0 && x <= 1 ); + return u32( x * checked_cast< float >( ( 1 << num_bits ) - 1 ) + 0.5f ); } -INLINE float dequantize( u32 q, u32 num_bits ) { - return float( q ) / float( 1 << num_bits ); +INLINE float dequantize01( u32 q, u32 num_bits ) { + float result = float( q ) / float( ( 1 << num_bits ) - 1 ); + ASSERT( result >= 0.0f && result <= 1.0f ); + return result; +} + +INLINE s32 quantize11( float x, u32 num_bits ) { + ASSERT( x >= -1 && x <= 1 ); + num_bits--; + if( x >= 0 ) { + return s32( x * checked_cast< float >( ( 1 << num_bits ) - 1 ) + 0.5f ); + } + + return s32( x * checked_cast< float >( 1 << num_bits ) - 0.5f ); +} + +INLINE float dequantize11( s32 q, u32 num_bits ) { + num_bits--; + if( q >= 0 ) { + float result = dequantize01( checked_cast< u32 >( q ), num_bits ); + ASSERT( result >= 0.0f && result <= 1.0f ); + return result; + } + + float result = checked_cast< float >( q ) / checked_cast< float >( 1 << num_bits ); + ASSERT( result < 0.0f && result >= -1.0f ); + return result; } #endif // _INT_CONVERSIONS_H_