medfall

A super great game engine
Log | Files | Refs

utf8.h (1682B)


      1 #pragma once
      2 
      3 #include "intrinsics.h"
      4 
      5 // Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
      6 // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
      7 
      8 #define UTF8_ACCEPT 0
      9 #define UTF8_REJECT 12
     10 
     11 uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
     12 	static const uint8_t utf8d[] = {
     13 		// The first part of the table maps bytes to character classes that
     14 		// to reduce the size of the transition table and create bitmasks.
     15 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     16 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     17 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     18 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
     19 		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
     20 		7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
     21 		8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     22 		10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
     23 
     24 		// The second part is a transition table that maps a combination
     25 		// of a state of the automaton and a character class to a state.
     26 	 	 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
     27 		12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
     28 		12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
     29 		12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
     30 		12,36,12,12,12,12,12,12,12,12,12,12, 
     31 	};
     32 
     33 	uint32_t type = utf8d[byte];
     34 
     35 	*codep = (*state != UTF8_ACCEPT) ?
     36 		(byte & 0x3fu) | (*codep << 6) :
     37 		(0xff >> type) & (byte);
     38 
     39 	*state = utf8d[256 + *state + type];
     40 	return *state;
     41 }