ggformat

A string formatting library for C++
Log | Files | Refs

commit cfd39127e8d795fa7b578efbaf26512d8a64e9ab
Author: Michael Savage <mikejsavage@gmail.com>
Date:   Wed, 23 Aug 2017 22:56:34 +0100

Initial commit

Diffstat:
README.md | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
basic_examples.cc | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ggformat.cc | 390+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
string_examples.cc | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 688 insertions(+), 0 deletions(-)

diff --git a/README.md b/README.md @@ -0,0 +1,159 @@ +# ggformat + +ggformat is a liberally licensed string formatting library for C++ that +supports user defined types without blowing up your compile times. It is +meant to be used as a replacement for printf and friends. + +ggformat requires C++11, and supports VS2013, VS2015, GCC and clang out +of the box. + +[tinyformat]: https://github.com/c42f/tinyformat + +I built ggformat because the existing string formatting options for C++ +either do not support user defined types or bloat compile times too +much. printf doesn't support user defined types. Streams bloat compile +times and IO manipulators are unreadable. [tinyformat](tinyformat) uses +streams under the hood and also bloats compile times. + + +## Usage + +The formatting API looks like this: + +``` +size_t ggformat( char * buf, size_t len, const char * fmt, ... ); +bool ggprint_to_file( FILE * file, const char * fmt, ... ); +bool ggprint( const char * fmt, ... ); +``` + +`ggformat` writes at most `len` bytes to `buf`, and always includes a +null terminator. Its return value is the number of bytes that would have +been written if `buf` were large enough, and can be larger than `len` +(just like sprintf). `ggprint_to_file` does what you would expect, and +`ggprint` writes to standard output. Both return `true` on success, and +`false` if the file could not be written to. + +Basic usage looks like this: + +``` +#include "ggformat.h" + +int main() { + ggprint( "hello {}\n", 1.23 ); // hello 1.23000 + return 0; +} +``` + + +## Format options + +You can add format specifiers between the braces to change how things +are printed. The following options are supported: + +- Plus sign (`{+}`): Prints a leading + for positive numeric types. +- Width (`{x}`): left pads the output with spaces to be `x` characters + wide. When used on floats, it left pads the output so the __left side + of the decimal point__ is `x` characters wide (I chose this because I + think it makes `{x.y}` more intuitive). If the output is already wider + than `x` characters, it doesn't do anything. +- Width with zero padding (`{0x}`): as above, but pads with zeroes + instead of spaces. +- Width with left alignment (`{-x}` or `{-0x}`): same again but puts the + spaces/zeroes on the right. +- Precision (`{.x}`): specifies the number of digits that appear after + the decimal point when printing floats. +- Number format (`{x}` or `{b}`): specifies that the value should be + printed as hexadecimal/binary. Numbers are printed as decimal if no + number format is given. + +These can all be combined, but should be kept in the order they were +just listed in. + +If you really want to print a { or }, use {{ and }} in the format +string. + + +## User defined types + +If you want to print your own types with ggformat, you need to define +`void format( FormatBuffer * fb, T x, const FormatOpts & opts );`. +`FormatBuffer` is a wrapper around a `char *` and length and its exact +definition is not important. `FormatOpts` is the parsed format options +and is defined as: + +``` +struct FormatOpts { + enum NumberFormat { DECIMAL, HEX, BINARY }; + + int width = -1; + int precision = -1; + bool plus_sign = false; + bool left_align = false; + bool zero_pad = false; + NumberFormat number_format = DECIMAL; +}; +``` + +`format` implementations are typically quite simple: + +``` +#include "ggformat.h" + +struct v3 { + explicit v3( float x_, float y_, float z_ ) { x = x_; y = y_; z = z_; } + float x, y, z; +}; + +v3 operator+( const v3 & lhs, const v3 & rhs ) { + return v3( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z ); +} + +void format( FormatBuffer * fb, const v3 & v, const FormatOpts & opts ) { + format( fb, "v3(" ); + format( fb, v.x, opts ); + format( fb, ", " ); + format( fb, v.y, opts ); + format( fb, ", " ); + format( fb, v.z, opts ); + format( fb, ")" ); +} + +int main() { + v3 a = v3( 1, 2, 3 ); + v3 b = v3( 4, 5, 6 ); + // a = v3(1.00000, 2.00000, 3.00000). b = v3( 4.00, 5.00, 6.00). + // a + b = v3(+5.00000, +7.00000, +9.00000) + ggprint( "a = {}. b = {2.2}.\na + b = {+}\n", a, b, a + b ); + + return 0; +} +``` + +If you have a huge type and don't feel like writing a wall of `format`, +see `Thing` in basic_examples.cc. + + +## Other stuff + +Since this is C++ you can and should wrap `ggformat` in a string class +to make it more convenient to use. You can see an example in +string_examples.cc. + +ggformat uses sprintf under the hood. Compile times are slightly worse +than sprintf and quite a bit better than tinyformat. Runtime performance +is not important, but ggformat should not be much slower than sprintf. + +`ggformat` does not allocate memory. `ggprint_to_file` and `ggprint` +will allocate for strings larger than 4k. Currently they call `malloc` +and `free` but that's very easy to change if you'd rather use your own +allocators. + +ggformat is not especially strict about validating format strings and +aborts when it does find an error. You should not be passing user +defined strings as format strings, and I believe it's more helpful to +fail hard on programmer typos. If you don't like that then it's easy +enough to change. + +In general ggformat is short enough that you can easily modify it to fit +your needs, and will be updated infrequently enough that doing so isn't +a huge pain. diff --git a/basic_examples.cc b/basic_examples.cc @@ -0,0 +1,64 @@ +/* + * this file demonstrates basic ggformat usage + * + * compile me with "cl.exe basic_examples.cc ggformat.cc" + * or "g++ -std=c++11 basic_examples.cc ggformat.cc" + */ + +#include <stdint.h> +#include "ggformat.h" + +struct v3 { + explicit v3( float x_, float y_, float z_ ) { x = x_; y = y_; z = z_; } + float x, y, z; +}; + +v3 operator+( const v3 & lhs, const v3 & rhs ) { + return v3( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z ); +} + +void format( FormatBuffer * fb, const v3 & v, const FormatOpts & opts ) { + format( fb, "v3(" ); + format( fb, v.x, opts ); + format( fb, ", " ); + format( fb, v.y, opts ); + format( fb, ", " ); + format( fb, v.z, opts ); + format( fb, ")" ); +} + +struct Thing { + // pretend this is more complicated + int a; + float b; +}; + +void format( FormatBuffer * fb, const Thing & thing, const FormatOpts & opts ) { + // this is a bit of a hack but is occasionally useful + // note that opts are ignored, rather than forwarded to a and b + ggformat_impl( fb, "a = {}. b = {}", thing.a, thing.b ); +} + +int main() { + // basic types + ggprint( "ints: {-5} {04} {+} {}\n", 1, 1, 1, 1 ); + ggprint( "hex: 0x{04x}\n", 123 ); + ggprint( "bin: 0b{b} 0b{b} 0b{b} 0b{b}\n", uint64_t( 123 ), int32_t( -123 ), uint16_t( 123 ), uint8_t( 123 ) ); + ggprint( "floats: {-10} {4.2} {+} {}\n", 1.23, 1.23, 1.23, 1.23 ); + ggprint( "bools: {} {}\n", true, false ); + ggprint( "strings: {-10} {} {{ }}\n", "hello", "world" ); + + // user defined type + v3 a = v3( 1, 2, 3 ); + v3 b = v3( 4, 5, 6 ); + ggprint( "a = {}. b = {02.2}.\na + b = {+}\n", a, b, a + b ); + + // more complicated user defined type + Thing thing; + thing.a = 12345; + thing.b = 67890; + ggprint( "{}\n", thing ); + + return 0; +} + diff --git a/ggformat.cc b/ggformat.cc @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2017 Michael Savage <mike@mikejsavage.co.uk> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +#include <ctype.h> + +#include "ggformat.h" + +size_t strlcat( char * dst, const char * src, size_t dsize ); +long long strtonum( const char * numstr, long long minval, long long maxval, const char ** errstrp ); + +template< typename To, typename From > +inline To checked_cast( const From & from ) { + To result = To( from ); + GGFORMAT_ASSERT( From( result ) == from ); + return result; +} + +struct ShortString { + char buf[ 16 ]; + size_t length = 0; + + ShortString() { + buf[ 0 ] = '\0'; + } + + void operator+=( int x ) { + char num[ 16 ]; + snprintf( num, sizeof( num ), "%d", x ); + *this += num; + } + + void operator+=( const char * str ) { + strlcat( buf, str, sizeof( buf ) ); + } +}; + +template< typename T > +static void format_helper( FormatBuffer * fb, const ShortString & fmt, const T & x ) { + char * dst = fb->buf + fb->len; + size_t len = fb->capacity - fb->len; + + if( fb->len >= fb->capacity ) { + dst = NULL; + len = 0; + } + +#if PLATFORM_WINDOWS + int printed = _snprintf( NULL, 0, fmt.buf, x ); + _snprintf( dst, len, fmt.buf, x ); +#else + int printed = snprintf( dst, len, fmt.buf, x ); +#endif + fb->len += checked_cast< size_t >( printed ); +} + +void format( FormatBuffer * fb, double x, const FormatOpts & opts ) { + ShortString fmt; + fmt += "%"; + int precision = opts.precision != -1 ? opts.precision : 5; + if( opts.plus_sign ) fmt += "+"; + if( opts.left_align ) fmt += "-"; + if( opts.zero_pad ) fmt += "0"; + if( opts.width != -1 ) fmt += opts.width + 1 + precision; + fmt += "."; + fmt += precision; + fmt += "f"; + format_helper( fb, fmt, x ); +} + +void format( FormatBuffer * fb, char x, const FormatOpts & opts ) { + ShortString fmt; + fmt += "%"; + if( opts.left_align ) fmt += "-"; + if( opts.width != -1 ) fmt += opts.width; + fmt += "c"; + format_helper( fb, fmt, x ); +} + +void format( FormatBuffer * fb, const char * x, const FormatOpts & opts ) { + ShortString fmt; + fmt += "%"; + if( opts.left_align ) fmt += "-"; + if( opts.width != -1 ) fmt += opts.width; + fmt += "s"; + format_helper( fb, fmt, x ); +} + +void format( FormatBuffer * fb, bool x, const FormatOpts & opts ) { + format( fb, x ? "true" : "false", opts ); +} + +template< typename T > +static void int_helper( FormatBuffer * fb, const char * fmt_decimal, const T & x, const FormatOpts & opts ) { + ShortString fmt; + fmt += "%"; + if( opts.plus_sign ) fmt += "+"; + if( opts.left_align ) fmt += "-"; + if( opts.zero_pad ) fmt += "0"; + if( opts.width != -1 ) fmt += opts.width; + if( opts.number_format == FormatOpts::DECIMAL ) { + fmt += fmt_decimal; + } + else if( opts.number_format == FormatOpts::HEX ) { + fmt += "x"; + } + else if( opts.number_format == FormatOpts::BINARY ) { + fmt += "s"; + char binary[ sizeof( x ) * 8 + 1 ]; + binary[ sizeof( x ) * 8 ] = '\0'; + + for( size_t i = 0; i < sizeof( x ) * 8; i++ ) { + // this is UB for signed types, but who cares? + T bit = x & ( T( 1 ) << ( sizeof( x ) * 8 - i - 1 ) ); + binary[ i ] = bit == 0 ? '0' : '1'; + } + + format_helper( fb, fmt, binary ); + return; + } + format_helper( fb, fmt, x ); +} + +#define INT_OVERLOADS( T ) \ + void format( FormatBuffer * fb, signed T x, const FormatOpts & opts ) { \ + int_helper( fb, "d", x, opts ); \ + } \ + void format( FormatBuffer * fb, unsigned T x, const FormatOpts & opts ) { \ + int_helper( fb, "u", x, opts ); \ + } + +INT_OVERLOADS( char ) +INT_OVERLOADS( short ) +INT_OVERLOADS( int ) +INT_OVERLOADS( long ) +INT_OVERLOADS( long long ) + +#undef INT_OVERLOADS + +static const char * parse_format_bool( const char * p, const char * one_past_end, char x, bool * out ) { + if( p >= one_past_end ) return p; + if( *p != x ) return p; + *out = true; + return p + 1; +} + +static const char * parse_format_int( const char * p, const char * one_past_end, int * out ) { + char num[ 16 ]; + size_t num_len = 0; + + while( p + num_len < one_past_end && isdigit( p[ num_len ] ) ) { + num[ num_len ] = p[ num_len ]; + num_len++; + } + num[ num_len ] = '\0'; + + if( num_len == 0 ) return p; + + *out = int( strtonum( num, 1, 1024, NULL ) ); + GGFORMAT_ASSERT( *out != 0 ); + + return p + num_len; +} + +static const char * parse_format_precision( const char * p, const char * one_past_end, int * precision ) { + bool has_a_dot = false; + const char * after_dot = parse_format_bool( p, one_past_end, '.', &has_a_dot ); + if( !has_a_dot ) return p; + return parse_format_int( after_dot, one_past_end, precision ); +} + +static const char * parse_format_number_format( const char * p, const char * one_past_end, FormatOpts::NumberFormat * number_format ) { + *number_format = FormatOpts::DECIMAL; + + bool hex = false; + const char * after_hex = parse_format_bool( p, one_past_end, 'x', &hex ); + + if( hex ) { + *number_format = FormatOpts::HEX; + return after_hex; + } + + bool bin = false; + const char * after_bin = parse_format_bool( p, one_past_end, 'b', &bin ); + + if( bin ) { + *number_format = FormatOpts::BINARY; + return after_bin; + } + + return p; +} + + +FormatOpts parse_formatopts( const char * fmt, size_t len ) { + FormatOpts opts; + + const char * start = fmt; + const char * one_past_end = start + len; + + start = parse_format_bool( start, one_past_end, '+', &opts.plus_sign ); + start = parse_format_bool( start, one_past_end, '-', &opts.left_align ); + start = parse_format_bool( start, one_past_end, '0', &opts.zero_pad ); + start = parse_format_int( start, one_past_end, &opts.width ); + start = parse_format_precision( start, one_past_end, &opts.precision ); + start = parse_format_number_format( start, one_past_end, &opts.number_format ); + + GGFORMAT_ASSERT( start == one_past_end ); + + return opts; +} + +static bool strchridx( const char * haystack, char needle, size_t * idx, size_t skip = 0 ) { + *idx = skip; + while( haystack[ *idx ] != '\0' ) { + if( haystack[ *idx ] == needle ) { + return true; + } + ( *idx )++; + } + return false; +} + +bool ggformat_find( const char * str, size_t * start, size_t * one_past_end ) { + size_t open_idx; + bool has_open = strchridx( str, '{', &open_idx ); + if( has_open && str[ open_idx + 1 ] == '{' ) { + has_open = false; + } + if( !has_open ) open_idx = 0; + + size_t close_idx; + bool has_close = strchridx( str, '}', &close_idx, open_idx ); + if( has_close && str[ close_idx + 1 ] == '}' ) { + has_close = false; + } + + if( has_open ) { + GGFORMAT_ASSERT( has_close ); + GGFORMAT_ASSERT( open_idx < close_idx ); + + *start = open_idx; + *one_past_end = close_idx; + + return true; + } + + GGFORMAT_ASSERT( !has_close ); + return false; +} + +void ggformat_literals( FormatBuffer * fb, const char * literals, size_t len ) { + size_t copied_len = 0; + for( size_t i = 0; i < len; i++ ) { + if( literals[ i ] == '{' || literals[ i ] == '}' ) { + i++; + } + if( fb->len + copied_len < fb->capacity ) { + fb->buf[ fb->len + copied_len ] = literals[ i ]; + } + copied_len++; + } + fb->len += copied_len; + fb->buf[ fb->len < fb->capacity - 1 ? fb->len : fb->capacity - 1 ] = '\0'; +} + +void ggformat_impl( FormatBuffer * fb, const char * fmt ) { + size_t ignored; + GGFORMAT_ASSERT( !ggformat_find( fmt, &ignored, &ignored ) ); + ggformat_literals( fb, fmt, strlen( fmt ) ); +} + +/* + * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +size_t +strlcat(char *dst, const char *src, size_t dsize) +{ + const char *odst = dst; + const char *osrc = src; + size_t n = dsize; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end. */ + while (n-- != 0 && *dst != '\0') + dst++; + dlen = dst - odst; + n = dsize - dlen; + + if (n-- == 0) + return(dlen + strlen(src)); + while (*src != '\0') { + if (n != 0) { + *dst++ = *src; + n--; + } + src++; + } + *dst = '\0'; + + return(dlen + (src - osrc)); /* count does not include NUL */ +} + +/* + * Copyright (c) 2004 Ted Unangst and Todd Miller + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define INVALID 1 +#define TOOSMALL 2 +#define TOOLARGE 3 + +long long +strtonum(const char *numstr, long long minval, long long maxval, const char **errstrp) +{ + long long ll = 0; + char *ep; + int error = 0; + struct errval { + const char *errstr; + int err; + } ev[4] = { + { NULL, 0 }, + { "invalid", EINVAL }, + { "too small", ERANGE }, + { "too large", ERANGE }, + }; + + ev[0].err = errno; + errno = 0; + if (minval > maxval) + error = INVALID; + else { + ll = strtoll(numstr, &ep, 10); + if (numstr == ep || *ep != '\0') + error = INVALID; + else if ((ll == LLONG_MIN && errno == ERANGE) || ll < minval) + error = TOOSMALL; + else if ((ll == LLONG_MAX && errno == ERANGE) || ll > maxval) + error = TOOLARGE; + } + if (errstrp != NULL) + *errstrp = ev[error].errstr; + errno = ev[error].err; + if (error) + ll = 0; + + return (ll); +} diff --git a/string_examples.cc b/string_examples.cc @@ -0,0 +1,75 @@ +/* + * this file demonstrates integrating ggformat with a string class + * + * compile me with "cl.exe string_examples.cc ggformat.cc" + * or "g++ -std=c++11 string_examples.cc ggformat.cc" + */ + +#include "ggformat.h" + +template< typename T > +T min( T a, T b ) { + return a < b ? a : b; +} + +template< size_t N > +class str { +public: + str() { + clear(); + } + + template< typename... Rest > + str( const char * fmt, Rest... rest ) { + sprintf( fmt, rest... ); + } + + void clear() { + buf[ 0 ] = '\0'; + length = 0; + } + + template< typename T > + void operator+=( const T & x ) { + appendf( "{}", x ); + } + + template< typename... Rest > + void sprintf( const char * fmt, Rest... rest ) { + size_t copied = ggformat( buf, N, fmt, rest... ); + length = min( copied, N - 1 ); + } + + template< typename... Rest > + void appendf( const char * fmt, Rest... rest ) { + size_t copied = ggformat( buf + length, N - length, fmt, rest... ); + length += min( copied, N - length - 1 ); + } + + const char * c_str() const { + return buf; + } + +private: + char buf[ N ]; + size_t length; +}; + +template< size_t N > +void format( FormatBuffer * fb, const str< N > & buf, const FormatOpts & opts ) { + format( fb, buf.c_str(), opts ); +} + +int main() { + str< 256 > a( "hello {-10}:", "world" ); + a += " "; + a += 1; + a += " "; + a += 1.2345; + a += " "; + a += false; + a.appendf( ". {} w{}rld", "goodbye", 0 ); + + ggprint( "{}\n", a ); + return 0; +}