lpcap.cc (15957B)
1 /* 2 ** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ 3 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) 4 */ 5 6 #include "lua.h" 7 #include "lauxlib.h" 8 9 #include "lpcap.h" 10 #include "lptypes.h" 11 12 13 #define captype(cap) ((cap)->kind) 14 15 #define isclosecap(cap) (captype(cap) == Cclose) 16 17 #define closeaddr(c) ((c)->s + (c)->siz - 1) 18 19 #define isfullcap(cap) ((cap)->siz != 0) 20 21 #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) 22 23 #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) 24 25 26 27 /* 28 ** Put at the cache for Lua values the value indexed by 'v' in ktable 29 ** of the running pattern (if it is not there yet); returns its index. 30 */ 31 static int updatecache (CapState *cs, int v) { 32 int idx = cs->ptop + 1; /* stack index of cache for Lua values */ 33 if (v != cs->valuecached) { /* not there? */ 34 getfromktable(cs, v); /* get value from 'ktable' */ 35 lua_replace(cs->L, idx); /* put it at reserved stack position */ 36 cs->valuecached = v; /* keep track of what is there */ 37 } 38 return idx; 39 } 40 41 42 static int pushcapture (CapState *cs); 43 44 45 /* 46 ** Goes back in a list of captures looking for an open capture 47 ** corresponding to a close 48 */ 49 static Capture *findopen (Capture *cap) { 50 int n = 0; /* number of closes waiting an open */ 51 for (;;) { 52 cap--; 53 if (isclosecap(cap)) n++; /* one more open to skip */ 54 else if (!isfullcap(cap)) 55 if (n-- == 0) return cap; 56 } 57 } 58 59 60 /* 61 ** Go to the next capture 62 */ 63 static void nextcap (CapState *cs) { 64 Capture *cap = cs->cap; 65 if (!isfullcap(cap)) { /* not a single capture? */ 66 int n = 0; /* number of opens waiting a close */ 67 for (;;) { /* look for corresponding close */ 68 cap++; 69 if (isclosecap(cap)) { 70 if (n-- == 0) break; 71 } 72 else if (!isfullcap(cap)) n++; 73 } 74 } 75 cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ 76 } 77 78 79 /* 80 ** Push on the Lua stack all values generated by nested captures inside 81 ** the current capture. Returns number of values pushed. 'addextra' 82 ** makes it push the entire match after all captured values. The 83 ** entire match is pushed also if there are no other nested values, 84 ** so the function never returns zero. 85 */ 86 static int pushnestedvalues (CapState *cs, int addextra) { 87 Capture *co = cs->cap; 88 if (isfullcap(cs->cap++)) { /* no nested captures? */ 89 lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ 90 return 1; /* that is it */ 91 } 92 else { 93 int n = 0; 94 while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ 95 n += pushcapture(cs); 96 if (addextra || n == 0) { /* need extra? */ 97 lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ 98 n++; 99 } 100 cs->cap++; /* skip close entry */ 101 return n; 102 } 103 } 104 105 106 /* 107 ** Push only the first value generated by nested captures 108 */ 109 static void pushonenestedvalue (CapState *cs) { 110 int n = pushnestedvalues(cs, 0); 111 if (n > 1) 112 lua_pop(cs->L, n - 1); /* pop extra values */ 113 } 114 115 116 /* 117 ** Try to find a named group capture with the name given at the top of 118 ** the stack; goes backward from 'cap'. 119 */ 120 static Capture *findback (CapState *cs, Capture *cap) { 121 lua_State *L = cs->L; 122 while (cap-- > cs->ocap) { /* repeat until end of list */ 123 if (isclosecap(cap)) 124 cap = findopen(cap); /* skip nested captures */ 125 else if (!isfullcap(cap)) 126 continue; /* opening an enclosing capture: skip and get previous */ 127 if (captype(cap) == Cgroup) { 128 getfromktable(cs, cap->idx); /* get group name */ 129 if (lp_equal(L, -2, -1)) { /* right group? */ 130 lua_pop(L, 2); /* remove reference name and group name */ 131 return cap; 132 } 133 else lua_pop(L, 1); /* remove group name */ 134 } 135 } 136 luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1)); 137 return NULL; /* to avoid warnings */ 138 } 139 140 141 /* 142 ** Back-reference capture. Return number of values pushed. 143 */ 144 static int backrefcap (CapState *cs) { 145 int n; 146 Capture *curr = cs->cap; 147 pushluaval(cs); /* reference name */ 148 cs->cap = findback(cs, curr); /* find corresponding group */ 149 n = pushnestedvalues(cs, 0); /* push group's values */ 150 cs->cap = curr + 1; 151 return n; 152 } 153 154 155 /* 156 ** Table capture: creates a new table and populates it with nested 157 ** captures. 158 */ 159 static int tablecap (CapState *cs) { 160 lua_State *L = cs->L; 161 int n = 0; 162 lua_newtable(L); 163 if (isfullcap(cs->cap++)) 164 return 1; /* table is empty */ 165 while (!isclosecap(cs->cap)) { 166 if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ 167 pushluaval(cs); /* push group name */ 168 pushonenestedvalue(cs); 169 lua_settable(L, -3); 170 } 171 else { /* not a named group */ 172 int i; 173 int k = pushcapture(cs); 174 for (i = k; i > 0; i--) /* store all values into table */ 175 lua_rawseti(L, -(i + 1), n + i); 176 n += k; 177 } 178 } 179 cs->cap++; /* skip close entry */ 180 return 1; /* number of values pushed (only the table) */ 181 } 182 183 184 /* 185 ** Table-query capture 186 */ 187 static int querycap (CapState *cs) { 188 int idx = cs->cap->idx; 189 pushonenestedvalue(cs); /* get nested capture */ 190 lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ 191 if (!lua_isnil(cs->L, -1)) 192 return 1; 193 else { /* no value */ 194 lua_pop(cs->L, 1); /* remove nil */ 195 return 0; 196 } 197 } 198 199 200 /* 201 ** Fold capture 202 */ 203 static int foldcap (CapState *cs) { 204 int n; 205 lua_State *L = cs->L; 206 int idx = cs->cap->idx; 207 if (isfullcap(cs->cap++) || /* no nested captures? */ 208 isclosecap(cs->cap) || /* no nested captures (large subject)? */ 209 (n = pushcapture(cs)) == 0) /* nested captures with no values? */ 210 return luaL_error(L, "no initial value for fold capture"); 211 if (n > 1) 212 lua_pop(L, n - 1); /* leave only one result for accumulator */ 213 while (!isclosecap(cs->cap)) { 214 lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ 215 lua_insert(L, -2); /* put it before accumulator */ 216 n = pushcapture(cs); /* get next capture's values */ 217 lua_call(L, n + 1, 1); /* call folding function */ 218 } 219 cs->cap++; /* skip close entry */ 220 return 1; /* only accumulator left on the stack */ 221 } 222 223 224 /* 225 ** Function capture 226 */ 227 static int functioncap (CapState *cs) { 228 int n; 229 int top = lua_gettop(cs->L); 230 pushluaval(cs); /* push function */ 231 n = pushnestedvalues(cs, 0); /* push nested captures */ 232 lua_call(cs->L, n, LUA_MULTRET); /* call function */ 233 return lua_gettop(cs->L) - top; /* return function's results */ 234 } 235 236 237 /* 238 ** Select capture 239 */ 240 static int numcap (CapState *cs) { 241 int idx = cs->cap->idx; /* value to select */ 242 if (idx == 0) { /* no values? */ 243 nextcap(cs); /* skip entire capture */ 244 return 0; /* no value produced */ 245 } 246 else { 247 int n = pushnestedvalues(cs, 0); 248 if (n < idx) /* invalid index? */ 249 return luaL_error(cs->L, "no capture '%d'", idx); 250 else { 251 lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ 252 lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ 253 lua_pop(cs->L, n - 1); /* remove other captures */ 254 return 1; 255 } 256 } 257 } 258 259 260 /* 261 ** Return the stack index of the first runtime capture in the given 262 ** list of captures (or zero if no runtime captures) 263 */ 264 int finddyncap (Capture *cap, Capture *last) { 265 for (; cap < last; cap++) { 266 if (cap->kind == Cruntime) 267 return cap->idx; /* stack position of first capture */ 268 } 269 return 0; /* no dynamic captures in this segment */ 270 } 271 272 273 /* 274 ** Calls a runtime capture. Returns number of captures removed by 275 ** the call, including the initial Cgroup. (Captures to be added are 276 ** on the Lua stack.) 277 */ 278 int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { 279 int n, id; 280 lua_State *L = cs->L; 281 int otop = lua_gettop(L); 282 Capture *open = findopen(close); 283 assert(captype(open) == Cgroup); 284 id = finddyncap(open, close); /* get first dynamic capture argument */ 285 close->kind = Cclose; /* closes the group */ 286 close->s = s; 287 cs->cap = open; cs->valuecached = 0; /* prepare capture state */ 288 luaL_checkstack(L, 4, "too many runtime captures"); 289 pushluaval(cs); /* push function to be called */ 290 lua_pushvalue(L, SUBJIDX); /* push original subject */ 291 lua_pushinteger(L, s - cs->s + 1); /* push current position */ 292 n = pushnestedvalues(cs, 0); /* push nested captures */ 293 lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ 294 if (id > 0) { /* are there old dynamic captures to be removed? */ 295 int i; 296 for (i = id; i <= otop; i++) 297 lua_remove(L, id); /* remove old dynamic captures */ 298 *rem = otop - id + 1; /* total number of dynamic captures removed */ 299 } 300 else 301 *rem = 0; /* no dynamic captures removed */ 302 return close - open; /* number of captures of all kinds removed */ 303 } 304 305 306 /* 307 ** Auxiliary structure for substitution and string captures: keep 308 ** information about nested captures for future use, avoiding to push 309 ** string results into Lua 310 */ 311 typedef struct StrAux { 312 int isstring; /* whether capture is a string */ 313 union { 314 Capture *cp; /* if not a string, respective capture */ 315 struct { /* if it is a string... */ 316 const char *s; /* ... starts here */ 317 const char *e; /* ... ends here */ 318 } s; 319 } u; 320 } StrAux; 321 322 #define MAXSTRCAPS 10 323 324 /* 325 ** Collect values from current capture into array 'cps'. Current 326 ** capture must be Cstring (first call) or Csimple (recursive calls). 327 ** (In first call, fills %0 with whole match for Cstring.) 328 ** Returns number of elements in the array that were filled. 329 */ 330 static int getstrcaps (CapState *cs, StrAux *cps, int n) { 331 int k = n++; 332 cps[k].isstring = 1; /* get string value */ 333 cps[k].u.s.s = cs->cap->s; /* starts here */ 334 if (!isfullcap(cs->cap++)) { /* nested captures? */ 335 while (!isclosecap(cs->cap)) { /* traverse them */ 336 if (n >= MAXSTRCAPS) /* too many captures? */ 337 nextcap(cs); /* skip extra captures (will not need them) */ 338 else if (captype(cs->cap) == Csimple) /* string? */ 339 n = getstrcaps(cs, cps, n); /* put info. into array */ 340 else { 341 cps[n].isstring = 0; /* not a string */ 342 cps[n].u.cp = cs->cap; /* keep original capture */ 343 nextcap(cs); 344 n++; 345 } 346 } 347 cs->cap++; /* skip close */ 348 } 349 cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ 350 return n; 351 } 352 353 354 /* 355 ** add next capture value (which should be a string) to buffer 'b' 356 */ 357 static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); 358 359 360 /* 361 ** String capture: add result to buffer 'b' (instead of pushing 362 ** it into the stack) 363 */ 364 static void stringcap (luaL_Buffer *b, CapState *cs) { 365 StrAux cps[MAXSTRCAPS]; 366 int n; 367 size_t len, i; 368 const char *fmt; /* format string */ 369 fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); 370 n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ 371 for (i = 0; i < len; i++) { /* traverse them */ 372 if (fmt[i] != '%') /* not an escape? */ 373 luaL_addchar(b, fmt[i]); /* add it to buffer */ 374 else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ 375 luaL_addchar(b, fmt[i]); /* add to buffer */ 376 else { 377 int l = fmt[i] - '0'; /* capture index */ 378 if (l > n) 379 luaL_error(cs->L, "invalid capture index (%d)", l); 380 else if (cps[l].isstring) 381 luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); 382 else { 383 Capture *curr = cs->cap; 384 cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ 385 if (!addonestring(b, cs, "capture")) 386 luaL_error(cs->L, "no values in capture index %d", l); 387 cs->cap = curr; /* continue from where it stopped */ 388 } 389 } 390 } 391 } 392 393 394 /* 395 ** Substitution capture: add result to buffer 'b' 396 */ 397 static void substcap (luaL_Buffer *b, CapState *cs) { 398 const char *curr = cs->cap->s; 399 if (isfullcap(cs->cap)) /* no nested captures? */ 400 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ 401 else { 402 cs->cap++; /* skip open entry */ 403 while (!isclosecap(cs->cap)) { /* traverse nested captures */ 404 const char *next = cs->cap->s; 405 luaL_addlstring(b, curr, next - curr); /* add text up to capture */ 406 if (addonestring(b, cs, "replacement")) 407 curr = closeaddr(cs->cap - 1); /* continue after match */ 408 else /* no capture value */ 409 curr = next; /* keep original text in final result */ 410 } 411 luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ 412 } 413 cs->cap++; /* go to next capture */ 414 } 415 416 417 /* 418 ** Evaluates a capture and adds its first value to buffer 'b'; returns 419 ** whether there was a value 420 */ 421 static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { 422 switch (captype(cs->cap)) { 423 case Cstring: 424 stringcap(b, cs); /* add capture directly to buffer */ 425 return 1; 426 case Csubst: 427 substcap(b, cs); /* add capture directly to buffer */ 428 return 1; 429 default: { 430 lua_State *L = cs->L; 431 int n = pushcapture(cs); 432 if (n > 0) { 433 if (n > 1) lua_pop(L, n - 1); /* only one result */ 434 if (!lua_isstring(L, -1)) 435 luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1)); 436 luaL_addvalue(b); 437 } 438 return n; 439 } 440 } 441 } 442 443 444 /* 445 ** Push all values of the current capture into the stack; returns 446 ** number of values pushed 447 */ 448 static int pushcapture (CapState *cs) { 449 lua_State *L = cs->L; 450 luaL_checkstack(L, 4, "too many captures"); 451 switch (captype(cs->cap)) { 452 case Cposition: { 453 lua_pushinteger(L, cs->cap->s - cs->s + 1); 454 cs->cap++; 455 return 1; 456 } 457 case Cconst: { 458 pushluaval(cs); 459 cs->cap++; 460 return 1; 461 } 462 case Carg: { 463 int arg = (cs->cap++)->idx; 464 if (arg + FIXEDARGS > cs->ptop) 465 return luaL_error(L, "reference to absent extra argument #%d", arg); 466 lua_pushvalue(L, arg + FIXEDARGS); 467 return 1; 468 } 469 case Csimple: { 470 int k = pushnestedvalues(cs, 1); 471 lua_insert(L, -k); /* make whole match be first result */ 472 return k; 473 } 474 case Cruntime: { 475 lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ 476 return 1; 477 } 478 case Cstring: { 479 luaL_Buffer b; 480 luaL_buffinit(L, &b); 481 stringcap(&b, cs); 482 luaL_pushresult(&b); 483 return 1; 484 } 485 case Csubst: { 486 luaL_Buffer b; 487 luaL_buffinit(L, &b); 488 substcap(&b, cs); 489 luaL_pushresult(&b); 490 return 1; 491 } 492 case Cgroup: { 493 if (cs->cap->idx == 0) /* anonymous group? */ 494 return pushnestedvalues(cs, 0); /* add all nested values */ 495 else { /* named group: add no values */ 496 nextcap(cs); /* skip capture */ 497 return 0; 498 } 499 } 500 case Cbackref: return backrefcap(cs); 501 case Ctable: return tablecap(cs); 502 case Cfunction: return functioncap(cs); 503 case Cnum: return numcap(cs); 504 case Cquery: return querycap(cs); 505 case Cfold: return foldcap(cs); 506 default: assert(0); return 0; 507 } 508 } 509 510 511 /* 512 ** Prepare a CapState structure and traverse the entire list of 513 ** captures in the stack pushing its results. 's' is the subject 514 ** string, 'r' is the final position of the match, and 'ptop' 515 ** the index in the stack where some useful values were pushed. 516 ** Returns the number of results pushed. (If the list produces no 517 ** results, push the final position of the match.) 518 */ 519 int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { 520 Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); 521 int n = 0; 522 if (!isclosecap(capture)) { /* is there any capture? */ 523 CapState cs; 524 cs.ocap = cs.cap = capture; cs.L = L; 525 cs.s = s; cs.valuecached = 0; cs.ptop = ptop; 526 do { /* collect their values */ 527 n += pushcapture(&cs); 528 } while (!isclosecap(cs.cap)); 529 } 530 if (n == 0) { /* no capture values? */ 531 lua_pushinteger(L, r - s + 1); /* return only end position */ 532 n = 1; 533 } 534 return n; 535 } 536 537