File : s-regpat.adb


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT LIBRARY COMPONENTS                          --
   4 --                                                                          --
   5 --                          G N A T . R E G P A T                           --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --               Copyright (C) 1986 by University of Toronto.               --
  10 --                      Copyright (C) 1999-2016, AdaCore                    --
  11 --                                                                          --
  12 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  13 -- terms of the  GNU General Public License as published  by the Free Soft- --
  14 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  15 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  16 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  17 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 --                                                                          --
  23 -- You should have received a copy of the GNU General Public License and    --
  24 -- a copy of the GCC Runtime Library Exception along with this program;     --
  25 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  26 -- <http://www.gnu.org/licenses/>.                                          --
  27 --                                                                          --
  28 -- GNAT was originally developed  by the GNAT team at  New York University. --
  29 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  30 --                                                                          --
  31 ------------------------------------------------------------------------------
  32 
  33 --  This is an altered Ada 95 version of the original V8 style regular
  34 --  expression library written in C by Henry Spencer. Apart from the
  35 --  translation to Ada, the interface has been considerably changed to
  36 --  use the Ada String type instead of C-style nul-terminated strings.
  37 
  38 --  Beware that some of this code is subtly aware of the way operator
  39 --  precedence is structured in regular expressions. Serious changes in
  40 --  regular-expression syntax might require a total rethink.
  41 
  42 with System.IO;               use System.IO;
  43 with Ada.Characters.Handling; use Ada.Characters.Handling;
  44 with Ada.Unchecked_Conversion;
  45 
  46 package body System.Regpat is
  47 
  48    Debug : constant Boolean := False;
  49    --  Set to True to activate debug traces. This is normally set to constant
  50    --  False to simply delete all the trace code. It is to be edited to True
  51    --  for internal debugging of the package.
  52 
  53    ----------------------------
  54    -- Implementation details --
  55    ----------------------------
  56 
  57    --  This is essentially a linear encoding of a nondeterministic
  58    --  finite-state machine, also known as syntax charts or
  59    --  "railroad normal form" in parsing technology.
  60 
  61    --  Each node is an opcode plus a "next" pointer, possibly plus an
  62    --  operand. "Next" pointers of all nodes except BRANCH implement
  63    --  concatenation; a "next" pointer with a BRANCH on both ends of it
  64    --  is connecting two alternatives.
  65 
  66    --  The operand of some types of node is a literal string; for others,
  67    --  it is a node leading into a sub-FSM. In particular, the operand of
  68    --  a BRANCH node is the first node of the branch.
  69    --  (NB this is *not* a tree structure:  the tail of the branch connects
  70    --  to the thing following the set of BRANCHes).
  71 
  72    --  You can see the exact byte-compiled version by using the Dump
  73    --  subprogram. However, here are a few examples:
  74 
  75    --  (a|b):  1 : BRANCH  (next at  9)
  76    --          4 :    EXACT  (next at  17)   operand=a
  77    --          9 : BRANCH  (next at  17)
  78    --         12 :    EXACT  (next at  17)   operand=b
  79    --         17 : EOP  (next at 0)
  80    --
  81    --  (ab)*:  1 : CURLYX  (next at  25)  { 0, 32767}
  82    --          8 :    OPEN 1  (next at  12)
  83    --         12 :       EXACT  (next at  18)   operand=ab
  84    --         18 :    CLOSE 1  (next at  22)
  85    --         22 :    WHILEM  (next at 0)
  86    --         25 : NOTHING  (next at  28)
  87    --         28 : EOP  (next at 0)
  88 
  89    --  The opcodes are:
  90 
  91    type Opcode is
  92 
  93       --  Name          Operand?  Meaning
  94 
  95      (EOP,        -- no        End of program
  96       MINMOD,     -- no        Next operator is not greedy
  97 
  98       --  Classes of characters
  99 
 100       ANY,        -- no        Match any one character except newline
 101       SANY,       -- no        Match any character, including new line
 102       ANYOF,      -- class     Match any character in this class
 103       EXACT,      -- str       Match this string exactly
 104       EXACTF,     -- str       Match this string (case-folding is one)
 105       NOTHING,    -- no        Match empty string
 106       SPACE,      -- no        Match any whitespace character
 107       NSPACE,     -- no        Match any non-whitespace character
 108       DIGIT,      -- no        Match any numeric character
 109       NDIGIT,     -- no        Match any non-numeric character
 110       ALNUM,      -- no        Match any alphanumeric character
 111       NALNUM,     -- no        Match any non-alphanumeric character
 112 
 113       --  Branches
 114 
 115       BRANCH,     -- node      Match this alternative, or the next
 116 
 117       --  Simple loops (when the following node is one character in length)
 118 
 119       STAR,       -- node      Match this simple thing 0 or more times
 120       PLUS,       -- node      Match this simple thing 1 or more times
 121       CURLY,      -- 2num node Match this simple thing between n and m times.
 122 
 123       --  Complex loops
 124 
 125       CURLYX,     -- 2num node Match this complex thing {n,m} times
 126       --                       The nums are coded on two characters each
 127 
 128       WHILEM,     -- no        Do curly processing and see if rest matches
 129 
 130       --  Matches after or before a word
 131 
 132       BOL,        -- no        Match "" at beginning of line
 133       MBOL,       -- no        Same, assuming multiline (match after \n)
 134       SBOL,       -- no        Same, assuming single line (don't match at \n)
 135       EOL,        -- no        Match "" at end of line
 136       MEOL,       -- no        Same, assuming multiline (match before \n)
 137       SEOL,       -- no        Same, assuming single line (don't match at \n)
 138 
 139       BOUND,      -- no        Match "" at any word boundary
 140       NBOUND,     -- no        Match "" at any word non-boundary
 141 
 142       --  Parenthesis groups handling
 143 
 144       REFF,       -- num       Match some already matched string, folded
 145       OPEN,       -- num       Mark this point in input as start of #n
 146       CLOSE);     -- num       Analogous to OPEN
 147 
 148    for Opcode'Size use 8;
 149 
 150    --  Opcode notes:
 151 
 152    --  BRANCH
 153    --    The set of branches constituting a single choice are hooked
 154    --    together with their "next" pointers, since precedence prevents
 155    --    anything being concatenated to any individual branch. The
 156    --    "next" pointer of the last BRANCH in a choice points to the
 157    --    thing following the whole choice. This is also where the
 158    --    final "next" pointer of each individual branch points; each
 159    --    branch starts with the operand node of a BRANCH node.
 160 
 161    --  STAR,PLUS
 162    --    '?', and complex '*' and '+', are implemented with CURLYX.
 163    --    branches. Simple cases (one character per match) are implemented with
 164    --    STAR and PLUS for speed and to minimize recursive plunges.
 165 
 166    --  OPEN,CLOSE
 167    --    ...are numbered at compile time.
 168 
 169    --  EXACT, EXACTF
 170    --    There are in fact two arguments, the first one is the length (minus
 171    --    one of the string argument), coded on one character, the second
 172    --    argument is the string itself, coded on length + 1 characters.
 173 
 174    --  A node is one char of opcode followed by two chars of "next" pointer.
 175    --  "Next" pointers are stored as two 8-bit pieces, high order first. The
 176    --  value is a positive offset from the opcode of the node containing it.
 177    --  An operand, if any, simply follows the node. (Note that much of the
 178    --  code generation knows about this implicit relationship.)
 179 
 180    --  Using two bytes for the "next" pointer is vast overkill for most
 181    --  things, but allows patterns to get big without disasters.
 182 
 183    Next_Pointer_Bytes : constant := 3;
 184    --  Points after the "next pointer" data. An instruction is therefore:
 185    --     1 byte: instruction opcode
 186    --     2 bytes: pointer to next instruction
 187    --     * bytes: optional data for the instruction
 188 
 189    -----------------------
 190    -- Character classes --
 191    -----------------------
 192    --  This is the implementation for character classes ([...]) in the
 193    --  syntax for regular expressions. Each character (0..256) has an
 194    --  entry into the table. This makes for a very fast matching
 195    --  algorithm.
 196 
 197    type Class_Byte is mod 256;
 198    type Character_Class is array (Class_Byte range 0 .. 31) of Class_Byte;
 199 
 200    type Bit_Conversion_Array is array (Class_Byte range 0 .. 7) of Class_Byte;
 201    Bit_Conversion : constant Bit_Conversion_Array :=
 202                       (1, 2, 4, 8, 16, 32, 64, 128);
 203 
 204    type Std_Class is (ANYOF_NONE,
 205                       ANYOF_ALNUM,   --  Alphanumeric class [a-zA-Z0-9]
 206                       ANYOF_NALNUM,
 207                       ANYOF_SPACE,   --  Space class [ \t\n\r\f]
 208                       ANYOF_NSPACE,
 209                       ANYOF_DIGIT,   --  Digit class [0-9]
 210                       ANYOF_NDIGIT,
 211                       ANYOF_ALNUMC,  --  Alphanumeric class [a-zA-Z0-9]
 212                       ANYOF_NALNUMC,
 213                       ANYOF_ALPHA,   --  Alpha class [a-zA-Z]
 214                       ANYOF_NALPHA,
 215                       ANYOF_ASCII,   --  Ascii class (7 bits) 0..127
 216                       ANYOF_NASCII,
 217                       ANYOF_CNTRL,   --  Control class
 218                       ANYOF_NCNTRL,
 219                       ANYOF_GRAPH,   --  Graphic class
 220                       ANYOF_NGRAPH,
 221                       ANYOF_LOWER,   --  Lower case class [a-z]
 222                       ANYOF_NLOWER,
 223                       ANYOF_PRINT,   --  printable class
 224                       ANYOF_NPRINT,
 225                       ANYOF_PUNCT,   --
 226                       ANYOF_NPUNCT,
 227                       ANYOF_UPPER,   --  Upper case class [A-Z]
 228                       ANYOF_NUPPER,
 229                       ANYOF_XDIGIT,  --  Hexadecimal digit
 230                       ANYOF_NXDIGIT
 231                       );
 232 
 233    procedure Set_In_Class
 234      (Bitmap : in out Character_Class;
 235       C      : Character);
 236    --  Set the entry to True for C in the class Bitmap
 237 
 238    function Get_From_Class
 239      (Bitmap : Character_Class;
 240       C      : Character) return Boolean;
 241    --  Return True if the entry is set for C in the class Bitmap
 242 
 243    procedure Reset_Class (Bitmap : out Character_Class);
 244    --  Clear all the entries in the class Bitmap
 245 
 246    pragma Inline (Set_In_Class);
 247    pragma Inline (Get_From_Class);
 248    pragma Inline (Reset_Class);
 249 
 250    -----------------------
 251    -- Local Subprograms --
 252    -----------------------
 253 
 254    function "=" (Left : Character; Right : Opcode) return Boolean;
 255 
 256    function Is_Alnum (C : Character) return Boolean;
 257    --  Return True if C is an alphanum character or an underscore ('_')
 258 
 259    function Is_White_Space (C : Character) return Boolean;
 260    --  Return True if C is a whitespace character
 261 
 262    function Is_Printable (C : Character) return Boolean;
 263    --  Return True if C is a printable character
 264 
 265    function Operand (P : Pointer) return Pointer;
 266    --  Return a pointer to the first operand of the node at P
 267 
 268    function String_Length
 269      (Program : Program_Data;
 270       P       : Pointer) return Program_Size;
 271    --  Return the length of the string argument of the node at P
 272 
 273    function String_Operand (P : Pointer) return Pointer;
 274    --  Return a pointer to the string argument of the node at P
 275 
 276    procedure Bitmap_Operand
 277      (Program : Program_Data;
 278       P       : Pointer;
 279       Op      : out Character_Class);
 280    --  Return a pointer to the string argument of the node at P
 281 
 282    function Get_Next
 283      (Program : Program_Data;
 284       IP      : Pointer) return Pointer;
 285    --  Dig the next instruction pointer out of a node
 286 
 287    procedure Optimize (Self : in out Pattern_Matcher);
 288    --  Optimize a Pattern_Matcher by noting certain special cases
 289 
 290    function Read_Natural
 291      (Program : Program_Data;
 292       IP      : Pointer) return Natural;
 293    --  Return the 2-byte natural coded at position IP
 294 
 295    --  All of the subprograms above are tiny and should be inlined
 296 
 297    pragma Inline ("=");
 298    pragma Inline (Is_Alnum);
 299    pragma Inline (Is_White_Space);
 300    pragma Inline (Get_Next);
 301    pragma Inline (Operand);
 302    pragma Inline (Read_Natural);
 303    pragma Inline (String_Length);
 304    pragma Inline (String_Operand);
 305 
 306    type Expression_Flags is record
 307       Has_Width,            -- Known never to match null string
 308       Simple,               -- Simple enough to be STAR/PLUS operand
 309       SP_Start  : Boolean;  -- Starts with * or +
 310    end record;
 311 
 312    Worst_Expression : constant Expression_Flags := (others => False);
 313    --  Worst case
 314 
 315    procedure Dump_Until
 316      (Program  : Program_Data;
 317       Index    : in out Pointer;
 318       Till     : Pointer;
 319       Indent   : Natural;
 320       Do_Print : Boolean := True);
 321    --  Dump the program until the node Till (not included) is met. Every line
 322    --  is indented with Index spaces at the beginning Dumps till the end if
 323    --  Till is 0.
 324 
 325    procedure Dump_Operation
 326       (Program      : Program_Data;
 327        Index        : Pointer;
 328        Indent       : Natural);
 329    --  Same as above, but only dumps a single operation, and compute its
 330    --  indentation from the program.
 331 
 332    ---------
 333    -- "=" --
 334    ---------
 335 
 336    function "=" (Left : Character; Right : Opcode) return Boolean is
 337    begin
 338       return Character'Pos (Left) = Opcode'Pos (Right);
 339    end "=";
 340 
 341    --------------------
 342    -- Bitmap_Operand --
 343    --------------------
 344 
 345    procedure Bitmap_Operand
 346      (Program : Program_Data;
 347       P       : Pointer;
 348       Op      : out Character_Class)
 349    is
 350       function Convert is new Ada.Unchecked_Conversion
 351         (Program_Data, Character_Class);
 352 
 353    begin
 354       Op (0 .. 31) := Convert (Program (P + Next_Pointer_Bytes .. P + 34));
 355    end Bitmap_Operand;
 356 
 357    -------------
 358    -- Compile --
 359    -------------
 360 
 361    procedure Compile
 362      (Matcher         : out Pattern_Matcher;
 363       Expression      : String;
 364       Final_Code_Size : out Program_Size;
 365       Flags           : Regexp_Flags := No_Flags)
 366    is
 367       --  We can't allocate space until we know how big the compiled form
 368       --  will be, but we can't compile it (and thus know how big it is)
 369       --  until we've got a place to put the code. So we cheat: we compile
 370       --  it twice, once with code generation turned off and size counting
 371       --  turned on, and once "for real".
 372 
 373       --  This also means that we don't allocate space until we are sure
 374       --  that the thing really will compile successfully, and we never
 375       --  have to move the code and thus invalidate pointers into it.
 376 
 377       --  Beware that the optimization-preparation code in here knows
 378       --  about some of the structure of the compiled regexp.
 379 
 380       PM        : Pattern_Matcher renames Matcher;
 381       Program   : Program_Data renames PM.Program;
 382 
 383       Emit_Ptr  : Pointer := Program_First;
 384 
 385       Parse_Pos : Natural := Expression'First; -- Input-scan pointer
 386       Parse_End : constant Natural := Expression'Last;
 387 
 388       ----------------------------
 389       -- Subprograms for Create --
 390       ----------------------------
 391 
 392       procedure Emit (B : Character);
 393       --  Output the Character B to the Program. If code-generation is
 394       --  disabled, simply increments the program counter.
 395 
 396       function  Emit_Node (Op : Opcode) return Pointer;
 397       --  If code-generation is enabled, Emit_Node outputs the
 398       --  opcode Op and reserves space for a pointer to the next node.
 399       --  Return value is the location of new opcode, i.e. old Emit_Ptr.
 400 
 401       procedure Emit_Natural (IP : Pointer; N : Natural);
 402       --  Split N on two characters at position IP
 403 
 404       procedure Emit_Class (Bitmap : Character_Class);
 405       --  Emits a character class
 406 
 407       procedure Case_Emit (C : Character);
 408       --  Emit C, after converting is to lower-case if the regular
 409       --  expression is case insensitive.
 410 
 411       procedure Parse
 412         (Parenthesized : Boolean;
 413          Capturing     : Boolean;
 414          Flags         : out Expression_Flags;
 415          IP            : out Pointer);
 416       --  Parse regular expression, i.e. main body or parenthesized thing.
 417       --  Caller must absorb opening parenthesis. Capturing should be set to
 418       --  True when we have an open parenthesis from which we want the user
 419       --  to extra text.
 420 
 421       procedure Parse_Branch
 422         (Flags         : out Expression_Flags;
 423          First         : Boolean;
 424          IP            : out Pointer);
 425       --  Implements the concatenation operator and handles '|'.
 426       --  First should be true if this is the first item of the alternative.
 427 
 428       procedure Parse_Piece
 429         (Expr_Flags : out Expression_Flags;
 430          IP         : out Pointer);
 431       --  Parse something followed by possible [*+?]
 432 
 433       procedure Parse_Atom
 434         (Expr_Flags : out Expression_Flags;
 435          IP         : out Pointer);
 436       --  Parse_Atom is the lowest level parse procedure.
 437       --
 438       --  Optimization: Gobbles an entire sequence of ordinary characters so
 439       --  that it can turn them into a single node, which is smaller to store
 440       --  and faster to run. Backslashed characters are exceptions, each
 441       --  becoming a separate node; the code is simpler that way and it's
 442       --  not worth fixing.
 443 
 444       procedure Insert_Operator
 445         (Op       : Opcode;
 446          Operand  : Pointer;
 447          Greedy   : Boolean := True);
 448       --  Insert_Operator inserts an operator in front of an already-emitted
 449       --  operand and relocates the operand. This applies to PLUS and STAR.
 450       --  If Minmod is True, then the operator is non-greedy.
 451 
 452       function Insert_Operator_Before
 453         (Op      : Opcode;
 454          Operand : Pointer;
 455          Greedy  : Boolean;
 456          Opsize  : Pointer) return Pointer;
 457       --  Insert an operator before Operand (and move the latter forward in the
 458       --  program). Opsize is the size needed to represent the operator. This
 459       --  returns the position at which the operator was inserted, and moves
 460       --  Emit_Ptr after the new position of the operand.
 461 
 462       procedure Insert_Curly_Operator
 463         (Op      : Opcode;
 464          Min     : Natural;
 465          Max     : Natural;
 466          Operand : Pointer;
 467          Greedy  : Boolean := True);
 468       --  Insert an operator for CURLY ({Min}, {Min,} or {Min,Max}).
 469       --  If Minmod is True, then the operator is non-greedy.
 470 
 471       procedure Link_Tail (P, Val : Pointer);
 472       --  Link_Tail sets the next-pointer at the end of a node chain
 473 
 474       procedure Link_Operand_Tail (P, Val : Pointer);
 475       --  Link_Tail on operand of first argument; noop if operand-less
 476 
 477       procedure Fail (M : String);
 478       pragma No_Return (Fail);
 479       --  Fail with a diagnostic message, if possible
 480 
 481       function Is_Curly_Operator (IP : Natural) return Boolean;
 482       --  Return True if IP is looking at a '{' that is the beginning
 483       --  of a curly operator, i.e. it matches {\d+,?\d*}
 484 
 485       function Is_Mult (IP : Natural) return Boolean;
 486       --  Return True if C is a regexp multiplier: '+', '*' or '?'
 487 
 488       procedure Get_Curly_Arguments
 489         (IP     : Natural;
 490          Min    : out Natural;
 491          Max    : out Natural;
 492          Greedy : out Boolean);
 493       --  Parse the argument list for a curly operator.
 494       --  It is assumed that IP is indeed pointing at a valid operator.
 495       --  So what is IP and how come IP is not referenced in the body ???
 496 
 497       procedure Parse_Character_Class (IP : out Pointer);
 498       --  Parse a character class.
 499       --  The calling subprogram should consume the opening '[' before.
 500 
 501       procedure Parse_Literal
 502         (Expr_Flags : out Expression_Flags;
 503          IP         : out Pointer);
 504       --  Parse_Literal encodes a string of characters to be matched exactly
 505 
 506       function Parse_Posix_Character_Class return Std_Class;
 507       --  Parse a posix character class, like [:alpha:] or [:^alpha:].
 508       --  The caller is supposed to absorb the opening [.
 509 
 510       pragma Inline (Is_Mult);
 511       pragma Inline (Emit_Natural);
 512       pragma Inline (Parse_Character_Class); --  since used only once
 513 
 514       ---------------
 515       -- Case_Emit --
 516       ---------------
 517 
 518       procedure Case_Emit (C : Character) is
 519       begin
 520          if (Flags and Case_Insensitive) /= 0 then
 521             Emit (To_Lower (C));
 522 
 523          else
 524             --  Dump current character
 525 
 526             Emit (C);
 527          end if;
 528       end Case_Emit;
 529 
 530       ----------
 531       -- Emit --
 532       ----------
 533 
 534       procedure Emit (B : Character) is
 535       begin
 536          if Emit_Ptr <= PM.Size then
 537             Program (Emit_Ptr) := B;
 538          end if;
 539 
 540          Emit_Ptr := Emit_Ptr + 1;
 541       end Emit;
 542 
 543       ----------------
 544       -- Emit_Class --
 545       ----------------
 546 
 547       procedure Emit_Class (Bitmap : Character_Class) is
 548          subtype Program31 is Program_Data (0 .. 31);
 549 
 550          function Convert is new Ada.Unchecked_Conversion
 551            (Character_Class, Program31);
 552 
 553       begin
 554          --  What is the mysterious constant 31 here??? Can't it be expressed
 555          --  symbolically (size of integer - 1 or some such???). In any case
 556          --  it should be declared as a constant (and referenced presumably
 557          --  as this constant + 1 below.
 558 
 559          if Emit_Ptr + 31 <= PM.Size then
 560             Program (Emit_Ptr .. Emit_Ptr + 31) := Convert (Bitmap);
 561          end if;
 562 
 563          Emit_Ptr := Emit_Ptr + 32;
 564       end Emit_Class;
 565 
 566       ------------------
 567       -- Emit_Natural --
 568       ------------------
 569 
 570       procedure Emit_Natural (IP : Pointer; N : Natural) is
 571       begin
 572          if IP + 1 <= PM.Size then
 573             Program (IP + 1) := Character'Val (N / 256);
 574             Program (IP) := Character'Val (N mod 256);
 575          end if;
 576       end Emit_Natural;
 577 
 578       ---------------
 579       -- Emit_Node --
 580       ---------------
 581 
 582       function Emit_Node (Op : Opcode) return Pointer is
 583          Result : constant Pointer := Emit_Ptr;
 584 
 585       begin
 586          if Emit_Ptr + 2 <= PM.Size then
 587             Program (Emit_Ptr) := Character'Val (Opcode'Pos (Op));
 588             Program (Emit_Ptr + 1) := ASCII.NUL;
 589             Program (Emit_Ptr + 2) := ASCII.NUL;
 590          end if;
 591 
 592          Emit_Ptr := Emit_Ptr + Next_Pointer_Bytes;
 593          return Result;
 594       end Emit_Node;
 595 
 596       ----------
 597       -- Fail --
 598       ----------
 599 
 600       procedure Fail (M : String) is
 601       begin
 602          raise Expression_Error with M;
 603       end Fail;
 604 
 605       -------------------------
 606       -- Get_Curly_Arguments --
 607       -------------------------
 608 
 609       procedure Get_Curly_Arguments
 610         (IP     : Natural;
 611          Min    : out Natural;
 612          Max    : out Natural;
 613          Greedy : out Boolean)
 614       is
 615          pragma Unreferenced (IP);
 616 
 617          Save_Pos : Natural := Parse_Pos + 1;
 618 
 619       begin
 620          Min := 0;
 621          Max := Max_Curly_Repeat;
 622 
 623          while Expression (Parse_Pos) /= '}'
 624            and then Expression (Parse_Pos) /= ','
 625          loop
 626             Parse_Pos := Parse_Pos + 1;
 627          end loop;
 628 
 629          Min := Natural'Value (Expression (Save_Pos .. Parse_Pos - 1));
 630 
 631          if Expression (Parse_Pos) = ',' then
 632             Save_Pos := Parse_Pos + 1;
 633             while Expression (Parse_Pos) /= '}' loop
 634                Parse_Pos := Parse_Pos + 1;
 635             end loop;
 636 
 637             if Save_Pos /= Parse_Pos then
 638                Max := Natural'Value (Expression (Save_Pos .. Parse_Pos - 1));
 639             end if;
 640 
 641          else
 642             Max := Min;
 643          end if;
 644 
 645          if Parse_Pos < Expression'Last
 646            and then Expression (Parse_Pos + 1) = '?'
 647          then
 648             Greedy := False;
 649             Parse_Pos := Parse_Pos + 1;
 650 
 651          else
 652             Greedy := True;
 653          end if;
 654       end Get_Curly_Arguments;
 655 
 656       ---------------------------
 657       -- Insert_Curly_Operator --
 658       ---------------------------
 659 
 660       procedure Insert_Curly_Operator
 661         (Op      : Opcode;
 662          Min     : Natural;
 663          Max     : Natural;
 664          Operand : Pointer;
 665          Greedy  : Boolean := True)
 666       is
 667          Old    : Pointer;
 668       begin
 669          Old := Insert_Operator_Before (Op, Operand, Greedy, Opsize => 7);
 670          Emit_Natural (Old + Next_Pointer_Bytes, Min);
 671          Emit_Natural (Old + Next_Pointer_Bytes + 2, Max);
 672       end Insert_Curly_Operator;
 673 
 674       ----------------------------
 675       -- Insert_Operator_Before --
 676       ----------------------------
 677 
 678       function Insert_Operator_Before
 679         (Op      : Opcode;
 680          Operand : Pointer;
 681          Greedy  : Boolean;
 682          Opsize  : Pointer) return Pointer
 683       is
 684          Dest : constant Pointer := Emit_Ptr;
 685          Old  : Pointer;
 686          Size : Pointer := Opsize;
 687 
 688       begin
 689          --  If not greedy, we have to emit another opcode first
 690 
 691          if not Greedy then
 692             Size := Size + Next_Pointer_Bytes;
 693          end if;
 694 
 695          --  Move the operand in the byte-compilation, so that we can insert
 696          --  the operator before it.
 697 
 698          if Emit_Ptr + Size <= PM.Size then
 699             Program (Operand + Size .. Emit_Ptr + Size) :=
 700               Program (Operand .. Emit_Ptr);
 701          end if;
 702 
 703          --  Insert the operator at the position previously occupied by the
 704          --  operand.
 705 
 706          Emit_Ptr := Operand;
 707 
 708          if not Greedy then
 709             Old := Emit_Node (MINMOD);
 710             Link_Tail (Old, Old + Next_Pointer_Bytes);
 711          end if;
 712 
 713          Old := Emit_Node (Op);
 714          Emit_Ptr := Dest + Size;
 715          return Old;
 716       end Insert_Operator_Before;
 717 
 718       ---------------------
 719       -- Insert_Operator --
 720       ---------------------
 721 
 722       procedure Insert_Operator
 723         (Op      : Opcode;
 724          Operand : Pointer;
 725          Greedy  : Boolean := True)
 726       is
 727          Discard : Pointer;
 728          pragma Warnings (Off, Discard);
 729       begin
 730          Discard := Insert_Operator_Before
 731             (Op, Operand, Greedy, Opsize => Next_Pointer_Bytes);
 732       end Insert_Operator;
 733 
 734       -----------------------
 735       -- Is_Curly_Operator --
 736       -----------------------
 737 
 738       function Is_Curly_Operator (IP : Natural) return Boolean is
 739          Scan : Natural := IP;
 740 
 741       begin
 742          if Expression (Scan) /= '{'
 743            or else Scan + 2 > Expression'Last
 744            or else not Is_Digit (Expression (Scan + 1))
 745          then
 746             return False;
 747          end if;
 748 
 749          Scan := Scan + 1;
 750 
 751          --  The first digit
 752 
 753          loop
 754             Scan := Scan + 1;
 755 
 756             if Scan > Expression'Last then
 757                return False;
 758             end if;
 759 
 760             exit when not Is_Digit (Expression (Scan));
 761          end loop;
 762 
 763          if Expression (Scan) = ',' then
 764             loop
 765                Scan := Scan + 1;
 766 
 767                if Scan > Expression'Last then
 768                   return False;
 769                end if;
 770 
 771                exit when not Is_Digit (Expression (Scan));
 772             end loop;
 773          end if;
 774 
 775          return Expression (Scan) = '}';
 776       end Is_Curly_Operator;
 777 
 778       -------------
 779       -- Is_Mult --
 780       -------------
 781 
 782       function Is_Mult (IP : Natural) return Boolean is
 783          C : constant Character := Expression (IP);
 784 
 785       begin
 786          return     C = '*'
 787            or else  C = '+'
 788            or else  C = '?'
 789            or else (C = '{' and then Is_Curly_Operator (IP));
 790       end Is_Mult;
 791 
 792       -----------------------
 793       -- Link_Operand_Tail --
 794       -----------------------
 795 
 796       procedure Link_Operand_Tail (P, Val : Pointer) is
 797       begin
 798          if P <= PM.Size and then Program (P) = BRANCH then
 799             Link_Tail (Operand (P), Val);
 800          end if;
 801       end Link_Operand_Tail;
 802 
 803       ---------------
 804       -- Link_Tail --
 805       ---------------
 806 
 807       procedure Link_Tail (P, Val : Pointer) is
 808          Scan   : Pointer;
 809          Temp   : Pointer;
 810          Offset : Pointer;
 811 
 812       begin
 813          --  Find last node (the size of the pattern matcher might be too
 814          --  small, so don't try to read past its end).
 815 
 816          Scan := P;
 817          while Scan + Next_Pointer_Bytes <= PM.Size loop
 818             Temp := Get_Next (Program, Scan);
 819             exit when Temp = Scan;
 820             Scan := Temp;
 821          end loop;
 822 
 823          Offset := Val - Scan;
 824 
 825          Emit_Natural (Scan + 1, Natural (Offset));
 826       end Link_Tail;
 827 
 828       -----------
 829       -- Parse --
 830       -----------
 831 
 832       --  Combining parenthesis handling with the base level of regular
 833       --  expression is a trifle forced, but the need to tie the tails of the
 834       --  the branches to what follows makes it hard to avoid.
 835 
 836       procedure Parse
 837          (Parenthesized : Boolean;
 838           Capturing     : Boolean;
 839           Flags         : out Expression_Flags;
 840           IP            : out Pointer)
 841       is
 842          E           : String renames Expression;
 843          Br, Br2     : Pointer;
 844          Ender       : Pointer;
 845          Par_No      : Natural;
 846          New_Flags   : Expression_Flags;
 847          Have_Branch : Boolean := False;
 848 
 849       begin
 850          Flags := (Has_Width => True, others => False);  -- Tentatively
 851 
 852          --  Make an OPEN node, if parenthesized
 853 
 854          if Parenthesized and then Capturing then
 855             if Matcher.Paren_Count > Max_Paren_Count then
 856                Fail ("too many ()");
 857             end if;
 858 
 859             Par_No := Matcher.Paren_Count + 1;
 860             Matcher.Paren_Count := Matcher.Paren_Count + 1;
 861             IP := Emit_Node (OPEN);
 862             Emit (Character'Val (Par_No));
 863          else
 864             IP := 0;
 865             Par_No := 0;
 866          end if;
 867 
 868          --  Pick up the branches, linking them together
 869 
 870          Parse_Branch (New_Flags, True, Br);
 871 
 872          if Br = 0 then
 873             IP := 0;
 874             return;
 875          end if;
 876 
 877          if Parse_Pos <= Parse_End
 878            and then E (Parse_Pos) = '|'
 879          then
 880             Insert_Operator (BRANCH, Br);
 881             Have_Branch := True;
 882          end if;
 883 
 884          if IP /= 0 then
 885             Link_Tail (IP, Br);   -- OPEN -> first
 886          else
 887             IP := Br;
 888          end if;
 889 
 890          if not New_Flags.Has_Width then
 891             Flags.Has_Width := False;
 892          end if;
 893 
 894          Flags.SP_Start := Flags.SP_Start or else New_Flags.SP_Start;
 895 
 896          while Parse_Pos <= Parse_End
 897            and then (E (Parse_Pos) = '|')
 898          loop
 899             Parse_Pos := Parse_Pos + 1;
 900             Parse_Branch (New_Flags, False, Br);
 901 
 902             if Br = 0 then
 903                IP := 0;
 904                return;
 905             end if;
 906 
 907             Link_Tail (IP, Br);   -- BRANCH -> BRANCH
 908 
 909             if not New_Flags.Has_Width then
 910                Flags.Has_Width := False;
 911             end if;
 912 
 913             Flags.SP_Start := Flags.SP_Start or else New_Flags.SP_Start;
 914          end loop;
 915 
 916          --  Make a closing node, and hook it on the end
 917 
 918          if Parenthesized then
 919             if Capturing then
 920                Ender := Emit_Node (CLOSE);
 921                Emit (Character'Val (Par_No));
 922                Link_Tail (IP, Ender);
 923 
 924             else
 925                --  Need to keep looking after the closing parenthesis
 926                Ender := Emit_Ptr;
 927             end if;
 928 
 929          else
 930             Ender := Emit_Node (EOP);
 931             Link_Tail (IP, Ender);
 932          end if;
 933 
 934          if Have_Branch and then Emit_Ptr <= PM.Size + 1 then
 935 
 936             --  Hook the tails of the branches to the closing node
 937 
 938             Br := IP;
 939             loop
 940                Link_Operand_Tail (Br, Ender);
 941                Br2 := Get_Next (Program, Br);
 942                exit when Br2 = Br;
 943                Br := Br2;
 944             end loop;
 945          end if;
 946 
 947          --  Check for proper termination
 948 
 949          if Parenthesized then
 950             if Parse_Pos > Parse_End or else E (Parse_Pos) /= ')' then
 951                Fail ("unmatched ()");
 952             end if;
 953 
 954             Parse_Pos := Parse_Pos + 1;
 955 
 956          elsif Parse_Pos <= Parse_End then
 957             if E (Parse_Pos) = ')'  then
 958                Fail ("unmatched ')'");
 959             else
 960                Fail ("junk on end");         -- "Can't happen"
 961             end if;
 962          end if;
 963       end Parse;
 964 
 965       ----------------
 966       -- Parse_Atom --
 967       ----------------
 968 
 969       procedure Parse_Atom
 970         (Expr_Flags : out Expression_Flags;
 971          IP         : out Pointer)
 972       is
 973          C : Character;
 974 
 975       begin
 976          --  Tentatively set worst expression case
 977 
 978          Expr_Flags := Worst_Expression;
 979 
 980          C := Expression (Parse_Pos);
 981          Parse_Pos := Parse_Pos + 1;
 982 
 983          case (C) is
 984             when '^' =>
 985                IP :=
 986                  Emit_Node
 987                    (if (Flags and Multiple_Lines) /= 0 then MBOL
 988                     elsif (Flags and Single_Line) /= 0 then SBOL
 989                     else BOL);
 990 
 991             when '$' =>
 992                IP :=
 993                  Emit_Node
 994                    (if (Flags and Multiple_Lines) /= 0 then MEOL
 995                     elsif (Flags and Single_Line) /= 0 then SEOL
 996                     else EOL);
 997 
 998             when '.' =>
 999                IP :=
1000                  Emit_Node
1001                    (if (Flags and Single_Line) /= 0 then SANY else ANY);
1002 
1003                Expr_Flags.Has_Width := True;
1004                Expr_Flags.Simple := True;
1005 
1006             when '[' =>
1007                Parse_Character_Class (IP);
1008                Expr_Flags.Has_Width := True;
1009                Expr_Flags.Simple := True;
1010 
1011             when '(' =>
1012                declare
1013                   New_Flags : Expression_Flags;
1014 
1015                begin
1016                   if Parse_Pos <= Parse_End - 1
1017                     and then Expression (Parse_Pos) = '?'
1018                     and then Expression (Parse_Pos + 1) = ':'
1019                   then
1020                      Parse_Pos := Parse_Pos + 2;
1021 
1022                      --  Non-capturing parenthesis
1023 
1024                      Parse (True, False, New_Flags, IP);
1025 
1026                   else
1027                      --  Capturing parenthesis
1028 
1029                      Parse (True, True, New_Flags, IP);
1030                      Expr_Flags.Has_Width :=
1031                        Expr_Flags.Has_Width or else New_Flags.Has_Width;
1032                      Expr_Flags.SP_Start :=
1033                        Expr_Flags.SP_Start or else New_Flags.SP_Start;
1034                      if IP = 0 then
1035                         return;
1036                      end if;
1037                   end if;
1038                end;
1039 
1040             when '|' | ASCII.LF | ')' =>
1041                Fail ("internal urp");  --  Supposed to be caught earlier
1042 
1043             when '?' | '+' | '*' =>
1044                Fail (C & " follows nothing");
1045 
1046             when '{' =>
1047                if Is_Curly_Operator (Parse_Pos - 1) then
1048                   Fail (C & " follows nothing");
1049                else
1050                   Parse_Literal (Expr_Flags, IP);
1051                end if;
1052 
1053             when '\' =>
1054                if Parse_Pos > Parse_End then
1055                   Fail ("trailing \");
1056                end if;
1057 
1058                Parse_Pos := Parse_Pos + 1;
1059 
1060                case Expression (Parse_Pos - 1) is
1061                   when 'b'        =>
1062                      IP := Emit_Node (BOUND);
1063 
1064                   when 'B'        =>
1065                      IP := Emit_Node (NBOUND);
1066 
1067                   when 's'        =>
1068                      IP := Emit_Node (SPACE);
1069                      Expr_Flags.Simple := True;
1070                      Expr_Flags.Has_Width := True;
1071 
1072                   when 'S'        =>
1073                      IP := Emit_Node (NSPACE);
1074                      Expr_Flags.Simple := True;
1075                      Expr_Flags.Has_Width := True;
1076 
1077                   when 'd'        =>
1078                      IP := Emit_Node (DIGIT);
1079                      Expr_Flags.Simple := True;
1080                      Expr_Flags.Has_Width := True;
1081 
1082                   when 'D'        =>
1083                      IP := Emit_Node (NDIGIT);
1084                      Expr_Flags.Simple := True;
1085                      Expr_Flags.Has_Width := True;
1086 
1087                   when 'w'        =>
1088                      IP := Emit_Node (ALNUM);
1089                      Expr_Flags.Simple := True;
1090                      Expr_Flags.Has_Width := True;
1091 
1092                   when 'W'        =>
1093                      IP := Emit_Node (NALNUM);
1094                      Expr_Flags.Simple := True;
1095                      Expr_Flags.Has_Width := True;
1096 
1097                   when 'A'        =>
1098                      IP := Emit_Node (SBOL);
1099 
1100                   when 'G'        =>
1101                      IP := Emit_Node (SEOL);
1102 
1103                   when '0' .. '9' =>
1104                      IP := Emit_Node (REFF);
1105 
1106                      declare
1107                         Save : constant Natural := Parse_Pos - 1;
1108 
1109                      begin
1110                         while Parse_Pos <= Expression'Last
1111                           and then Is_Digit (Expression (Parse_Pos))
1112                         loop
1113                            Parse_Pos := Parse_Pos + 1;
1114                         end loop;
1115 
1116                         Emit (Character'Val (Natural'Value
1117                                (Expression (Save .. Parse_Pos - 1))));
1118                      end;
1119 
1120                   when others =>
1121                      Parse_Pos := Parse_Pos - 1;
1122                      Parse_Literal (Expr_Flags, IP);
1123                end case;
1124 
1125             when others =>
1126                Parse_Literal (Expr_Flags, IP);
1127          end case;
1128       end Parse_Atom;
1129 
1130       ------------------
1131       -- Parse_Branch --
1132       ------------------
1133 
1134       procedure Parse_Branch
1135         (Flags : out Expression_Flags;
1136          First : Boolean;
1137          IP    : out Pointer)
1138       is
1139          E         : String renames Expression;
1140          Chain     : Pointer;
1141          Last      : Pointer;
1142          New_Flags : Expression_Flags;
1143 
1144          Discard : Pointer;
1145          pragma Warnings (Off, Discard);
1146 
1147       begin
1148          Flags := Worst_Expression;    -- Tentatively
1149          IP := (if First then Emit_Ptr else Emit_Node (BRANCH));
1150 
1151          Chain := 0;
1152          while Parse_Pos <= Parse_End
1153            and then E (Parse_Pos) /= ')'
1154            and then E (Parse_Pos) /= ASCII.LF
1155            and then E (Parse_Pos) /= '|'
1156          loop
1157             Parse_Piece (New_Flags, Last);
1158 
1159             if Last = 0 then
1160                IP := 0;
1161                return;
1162             end if;
1163 
1164             Flags.Has_Width := Flags.Has_Width or else New_Flags.Has_Width;
1165 
1166             if Chain = 0 then            -- First piece
1167                Flags.SP_Start := Flags.SP_Start or else New_Flags.SP_Start;
1168             else
1169                Link_Tail (Chain, Last);
1170             end if;
1171 
1172             Chain := Last;
1173          end loop;
1174 
1175          --  Case where loop ran zero CURLY
1176 
1177          if Chain = 0 then
1178             Discard := Emit_Node (NOTHING);
1179          end if;
1180       end Parse_Branch;
1181 
1182       ---------------------------
1183       -- Parse_Character_Class --
1184       ---------------------------
1185 
1186       procedure Parse_Character_Class (IP : out Pointer) is
1187          Bitmap      : Character_Class;
1188          Invert      : Boolean := False;
1189          In_Range    : Boolean := False;
1190          Named_Class : Std_Class := ANYOF_NONE;
1191          Value       : Character;
1192          Last_Value  : Character := ASCII.NUL;
1193 
1194       begin
1195          Reset_Class (Bitmap);
1196 
1197          --  Do we have an invert character class ?
1198 
1199          if Parse_Pos <= Parse_End
1200            and then Expression (Parse_Pos) = '^'
1201          then
1202             Invert := True;
1203             Parse_Pos := Parse_Pos + 1;
1204          end if;
1205 
1206          --  First character can be ] or - without closing the class
1207 
1208          if Parse_Pos <= Parse_End
1209            and then (Expression (Parse_Pos) = ']'
1210                       or else Expression (Parse_Pos) = '-')
1211          then
1212             Set_In_Class (Bitmap, Expression (Parse_Pos));
1213             Parse_Pos := Parse_Pos + 1;
1214          end if;
1215 
1216          --  While we don't have the end of the class
1217 
1218          while Parse_Pos <= Parse_End
1219            and then Expression (Parse_Pos) /= ']'
1220          loop
1221             Named_Class := ANYOF_NONE;
1222             Value := Expression (Parse_Pos);
1223             Parse_Pos := Parse_Pos + 1;
1224 
1225             --  Do we have a Posix character class
1226             if Value = '[' then
1227                Named_Class := Parse_Posix_Character_Class;
1228 
1229             elsif Value = '\' then
1230                if Parse_Pos = Parse_End then
1231                   Fail ("Trailing \");
1232                end if;
1233                Value := Expression (Parse_Pos);
1234                Parse_Pos := Parse_Pos + 1;
1235 
1236                case Value is
1237                   when 'w' => Named_Class := ANYOF_ALNUM;
1238                   when 'W' => Named_Class := ANYOF_NALNUM;
1239                   when 's' => Named_Class := ANYOF_SPACE;
1240                   when 'S' => Named_Class := ANYOF_NSPACE;
1241                   when 'd' => Named_Class := ANYOF_DIGIT;
1242                   when 'D' => Named_Class := ANYOF_NDIGIT;
1243                   when 'n' => Value := ASCII.LF;
1244                   when 'r' => Value := ASCII.CR;
1245                   when 't' => Value := ASCII.HT;
1246                   when 'f' => Value := ASCII.FF;
1247                   when 'e' => Value := ASCII.ESC;
1248                   when 'a' => Value := ASCII.BEL;
1249 
1250                   --  when 'x'  => ??? hexadecimal value
1251                   --  when 'c'  => ??? control character
1252                   --  when '0'..'9' => ??? octal character
1253 
1254                   when others => null;
1255                end case;
1256             end if;
1257 
1258             --  Do we have a character class?
1259 
1260             if Named_Class /= ANYOF_NONE then
1261 
1262                --  A range like 'a-\d' or 'a-[:digit:] is not a range
1263 
1264                if In_Range then
1265                   Set_In_Class (Bitmap, Last_Value);
1266                   Set_In_Class (Bitmap, '-');
1267                   In_Range := False;
1268                end if;
1269 
1270                --  Expand the range
1271 
1272                case Named_Class is
1273                   when ANYOF_NONE => null;
1274 
1275                   when ANYOF_ALNUM | ANYOF_ALNUMC =>
1276                      for Value in Class_Byte'Range loop
1277                         if Is_Alnum (Character'Val (Value)) then
1278                            Set_In_Class (Bitmap, Character'Val (Value));
1279                         end if;
1280                      end loop;
1281 
1282                   when ANYOF_NALNUM | ANYOF_NALNUMC =>
1283                      for Value in Class_Byte'Range loop
1284                         if not Is_Alnum (Character'Val (Value)) then
1285                            Set_In_Class (Bitmap, Character'Val (Value));
1286                         end if;
1287                      end loop;
1288 
1289                   when ANYOF_SPACE =>
1290                      for Value in Class_Byte'Range loop
1291                         if Is_White_Space (Character'Val (Value)) then
1292                            Set_In_Class (Bitmap, Character'Val (Value));
1293                         end if;
1294                      end loop;
1295 
1296                   when ANYOF_NSPACE =>
1297                      for Value in Class_Byte'Range loop
1298                         if not Is_White_Space (Character'Val (Value)) then
1299                            Set_In_Class (Bitmap, Character'Val (Value));
1300                         end if;
1301                      end loop;
1302 
1303                   when ANYOF_DIGIT =>
1304                      for Value in Class_Byte'Range loop
1305                         if Is_Digit (Character'Val (Value)) then
1306                            Set_In_Class (Bitmap, Character'Val (Value));
1307                         end if;
1308                      end loop;
1309 
1310                   when ANYOF_NDIGIT =>
1311                      for Value in Class_Byte'Range loop
1312                         if not Is_Digit (Character'Val (Value)) then
1313                            Set_In_Class (Bitmap, Character'Val (Value));
1314                         end if;
1315                      end loop;
1316 
1317                   when ANYOF_ALPHA =>
1318                      for Value in Class_Byte'Range loop
1319                         if Is_Letter (Character'Val (Value)) then
1320                            Set_In_Class (Bitmap, Character'Val (Value));
1321                         end if;
1322                      end loop;
1323 
1324                   when ANYOF_NALPHA =>
1325                      for Value in Class_Byte'Range loop
1326                         if not Is_Letter (Character'Val (Value)) then
1327                            Set_In_Class (Bitmap, Character'Val (Value));
1328                         end if;
1329                      end loop;
1330 
1331                   when ANYOF_ASCII =>
1332                      for Value in 0 .. 127 loop
1333                         Set_In_Class (Bitmap, Character'Val (Value));
1334                      end loop;
1335 
1336                   when ANYOF_NASCII =>
1337                      for Value in 128 .. 255 loop
1338                         Set_In_Class (Bitmap, Character'Val (Value));
1339                      end loop;
1340 
1341                   when ANYOF_CNTRL =>
1342                      for Value in Class_Byte'Range loop
1343                         if Is_Control (Character'Val (Value)) then
1344                            Set_In_Class (Bitmap, Character'Val (Value));
1345                         end if;
1346                      end loop;
1347 
1348                   when ANYOF_NCNTRL =>
1349                      for Value in Class_Byte'Range loop
1350                         if not Is_Control (Character'Val (Value)) then
1351                            Set_In_Class (Bitmap, Character'Val (Value));
1352                         end if;
1353                      end loop;
1354 
1355                   when ANYOF_GRAPH =>
1356                      for Value in Class_Byte'Range loop
1357                         if Is_Graphic (Character'Val (Value)) then
1358                            Set_In_Class (Bitmap, Character'Val (Value));
1359                         end if;
1360                      end loop;
1361 
1362                   when ANYOF_NGRAPH =>
1363                      for Value in Class_Byte'Range loop
1364                         if not Is_Graphic (Character'Val (Value)) then
1365                            Set_In_Class (Bitmap, Character'Val (Value));
1366                         end if;
1367                      end loop;
1368 
1369                   when ANYOF_LOWER =>
1370                      for Value in Class_Byte'Range loop
1371                         if Is_Lower (Character'Val (Value)) then
1372                            Set_In_Class (Bitmap, Character'Val (Value));
1373                         end if;
1374                      end loop;
1375 
1376                   when ANYOF_NLOWER =>
1377                      for Value in Class_Byte'Range loop
1378                         if not Is_Lower (Character'Val (Value)) then
1379                            Set_In_Class (Bitmap, Character'Val (Value));
1380                         end if;
1381                      end loop;
1382 
1383                   when ANYOF_PRINT =>
1384                      for Value in Class_Byte'Range loop
1385                         if Is_Printable (Character'Val (Value)) then
1386                            Set_In_Class (Bitmap, Character'Val (Value));
1387                         end if;
1388                      end loop;
1389 
1390                   when ANYOF_NPRINT =>
1391                      for Value in Class_Byte'Range loop
1392                         if not Is_Printable (Character'Val (Value)) then
1393                            Set_In_Class (Bitmap, Character'Val (Value));
1394                         end if;
1395                      end loop;
1396 
1397                   when ANYOF_PUNCT =>
1398                      for Value in Class_Byte'Range loop
1399                         if Is_Printable (Character'Val (Value))
1400                           and then not Is_White_Space (Character'Val (Value))
1401                           and then not Is_Alnum (Character'Val (Value))
1402                         then
1403                            Set_In_Class (Bitmap, Character'Val (Value));
1404                         end if;
1405                      end loop;
1406 
1407                   when ANYOF_NPUNCT =>
1408                      for Value in Class_Byte'Range loop
1409                         if not Is_Printable (Character'Val (Value))
1410                           or else Is_White_Space (Character'Val (Value))
1411                           or else Is_Alnum (Character'Val (Value))
1412                         then
1413                            Set_In_Class (Bitmap, Character'Val (Value));
1414                         end if;
1415                      end loop;
1416 
1417                   when ANYOF_UPPER =>
1418                      for Value in Class_Byte'Range loop
1419                         if Is_Upper (Character'Val (Value)) then
1420                            Set_In_Class (Bitmap, Character'Val (Value));
1421                         end if;
1422                      end loop;
1423 
1424                   when ANYOF_NUPPER =>
1425                      for Value in Class_Byte'Range loop
1426                         if not Is_Upper (Character'Val (Value)) then
1427                            Set_In_Class (Bitmap, Character'Val (Value));
1428                         end if;
1429                      end loop;
1430 
1431                   when ANYOF_XDIGIT =>
1432                      for Value in Class_Byte'Range loop
1433                         if Is_Hexadecimal_Digit (Character'Val (Value)) then
1434                            Set_In_Class (Bitmap, Character'Val (Value));
1435                         end if;
1436                      end loop;
1437 
1438                   when ANYOF_NXDIGIT =>
1439                      for Value in Class_Byte'Range loop
1440                         if not Is_Hexadecimal_Digit
1441                           (Character'Val (Value))
1442                         then
1443                            Set_In_Class (Bitmap, Character'Val (Value));
1444                         end if;
1445                      end loop;
1446 
1447                end case;
1448 
1449             --  Not a character range
1450 
1451             elsif not In_Range then
1452                Last_Value := Value;
1453 
1454                if Parse_Pos > Expression'Last then
1455                   Fail ("Empty character class []");
1456                end if;
1457 
1458                if Expression (Parse_Pos) = '-'
1459                  and then Parse_Pos < Parse_End
1460                  and then Expression (Parse_Pos + 1) /= ']'
1461                then
1462                   Parse_Pos := Parse_Pos + 1;
1463 
1464                   --  Do we have a range like '\d-a' and '[:space:]-a'
1465                   --  which is not a real range
1466 
1467                   if Named_Class /= ANYOF_NONE then
1468                      Set_In_Class (Bitmap, '-');
1469                   else
1470                      In_Range := True;
1471                   end if;
1472 
1473                else
1474                   Set_In_Class (Bitmap, Value);
1475 
1476                end if;
1477 
1478             --  Else in a character range
1479 
1480             else
1481                if Last_Value > Value then
1482                   Fail ("Invalid Range [" & Last_Value'Img
1483                         & "-" & Value'Img & "]");
1484                end if;
1485 
1486                while Last_Value <= Value loop
1487                   Set_In_Class (Bitmap, Last_Value);
1488                   Last_Value := Character'Succ (Last_Value);
1489                end loop;
1490 
1491                In_Range := False;
1492 
1493             end if;
1494 
1495          end loop;
1496 
1497          --  Optimize case-insensitive ranges (put the upper case or lower
1498          --  case character into the bitmap)
1499 
1500          if (Flags and Case_Insensitive) /= 0 then
1501             for C in Character'Range loop
1502                if Get_From_Class (Bitmap, C) then
1503                   Set_In_Class (Bitmap, To_Lower (C));
1504                   Set_In_Class (Bitmap, To_Upper (C));
1505                end if;
1506             end loop;
1507          end if;
1508 
1509          --  Optimize inverted classes
1510 
1511          if Invert then
1512             for J in Bitmap'Range loop
1513                Bitmap (J) := not Bitmap (J);
1514             end loop;
1515          end if;
1516 
1517          Parse_Pos := Parse_Pos + 1;
1518 
1519          --  Emit the class
1520 
1521          IP := Emit_Node (ANYOF);
1522          Emit_Class (Bitmap);
1523       end Parse_Character_Class;
1524 
1525       -------------------
1526       -- Parse_Literal --
1527       -------------------
1528 
1529       --  This is a bit tricky due to quoted chars and due to
1530       --  the multiplier characters '*', '+', and '?' that
1531       --  take the SINGLE char previous as their operand.
1532 
1533       --  On entry, the character at Parse_Pos - 1 is going to go
1534       --  into the string, no matter what it is. It could be
1535       --  following a \ if Parse_Atom was entered from the '\' case.
1536 
1537       --  Basic idea is to pick up a good char in C and examine
1538       --  the next char. If Is_Mult (C) then twiddle, if it's a \
1539       --  then frozzle and if it's another magic char then push C and
1540       --  terminate the string. If none of the above, push C on the
1541       --  string and go around again.
1542 
1543       --  Start_Pos is used to remember where "the current character"
1544       --  starts in the string, if due to an Is_Mult we need to back
1545       --  up and put the current char in a separate 1-character string.
1546       --  When Start_Pos is 0, C is the only char in the string;
1547       --  this is used in Is_Mult handling, and in setting the SIMPLE
1548       --  flag at the end.
1549 
1550       procedure Parse_Literal
1551         (Expr_Flags : out Expression_Flags;
1552          IP         : out Pointer)
1553       is
1554          Start_Pos  : Natural := 0;
1555          C          : Character;
1556          Length_Ptr : Pointer;
1557 
1558          Has_Special_Operator : Boolean := False;
1559 
1560       begin
1561          Parse_Pos := Parse_Pos - 1;      --  Look at current character
1562 
1563          IP :=
1564            Emit_Node
1565              (if (Flags and Case_Insensitive) /= 0 then EXACTF else EXACT);
1566 
1567          Length_Ptr := Emit_Ptr;
1568          Emit_Ptr := String_Operand (IP);
1569 
1570          Parse_Loop :
1571          loop
1572             C := Expression (Parse_Pos); --  Get current character
1573 
1574             case C is
1575                when '.' | '[' | '(' | ')' | '|' | ASCII.LF | '$' | '^' =>
1576 
1577                   if Start_Pos = 0 then
1578                      Start_Pos := Parse_Pos;
1579                      Emit (C);         --  First character is always emitted
1580                   else
1581                      exit Parse_Loop;  --  Else we are done
1582                   end if;
1583 
1584                when '?' | '+' | '*' | '{' =>
1585 
1586                   if Start_Pos = 0 then
1587                      Start_Pos := Parse_Pos;
1588                      Emit (C);         --  First character is always emitted
1589 
1590                   --  Are we looking at an operator, or is this
1591                   --  simply a normal character ?
1592 
1593                   elsif not Is_Mult (Parse_Pos) then
1594                      Start_Pos := Parse_Pos;
1595                      Case_Emit (C);
1596 
1597                   else
1598                      --  We've got something like "abc?d".  Mark this as a
1599                      --  special case. What we want to emit is a first
1600                      --  constant string for "ab", then one for "c" that will
1601                      --  ultimately be transformed with a CURLY operator, A
1602                      --  special case has to be handled for "a?", since there
1603                      --  is no initial string to emit.
1604 
1605                      Has_Special_Operator := True;
1606                      exit Parse_Loop;
1607                   end if;
1608 
1609                when '\' =>
1610                   Start_Pos := Parse_Pos;
1611 
1612                   if Parse_Pos = Parse_End then
1613                      Fail ("Trailing \");
1614 
1615                   else
1616                      case Expression (Parse_Pos + 1) is
1617                         when 'b' | 'B' | 's' | 'S' | 'd' | 'D'
1618                           | 'w' | 'W' | '0' .. '9' | 'G' | 'A'
1619                           => exit Parse_Loop;
1620                         when 'n'         => Emit (ASCII.LF);
1621                         when 't'         => Emit (ASCII.HT);
1622                         when 'r'         => Emit (ASCII.CR);
1623                         when 'f'         => Emit (ASCII.FF);
1624                         when 'e'         => Emit (ASCII.ESC);
1625                         when 'a'         => Emit (ASCII.BEL);
1626                         when others      => Emit (Expression (Parse_Pos + 1));
1627                      end case;
1628 
1629                      Parse_Pos := Parse_Pos + 1;
1630                   end if;
1631 
1632                when others =>
1633                   Start_Pos := Parse_Pos;
1634                   Case_Emit (C);
1635             end case;
1636 
1637             exit Parse_Loop when Emit_Ptr - Length_Ptr = 254;
1638 
1639             Parse_Pos := Parse_Pos + 1;
1640 
1641             exit Parse_Loop when Parse_Pos > Parse_End;
1642          end loop Parse_Loop;
1643 
1644          --  Is the string followed by a '*+?{' operator ? If yes, and if there
1645          --  is an initial string to emit, do it now.
1646 
1647          if Has_Special_Operator
1648            and then Emit_Ptr >= Length_Ptr + Next_Pointer_Bytes
1649          then
1650             Emit_Ptr := Emit_Ptr - 1;
1651             Parse_Pos := Start_Pos;
1652          end if;
1653 
1654          if Length_Ptr <= PM.Size then
1655             Program (Length_Ptr) := Character'Val (Emit_Ptr - Length_Ptr - 2);
1656          end if;
1657 
1658          Expr_Flags.Has_Width := True;
1659 
1660          --  Slight optimization when there is a single character
1661 
1662          if Emit_Ptr = Length_Ptr + 2 then
1663             Expr_Flags.Simple := True;
1664          end if;
1665       end Parse_Literal;
1666 
1667       -----------------
1668       -- Parse_Piece --
1669       -----------------
1670 
1671       --  Note that the branching code sequences used for '?' and the
1672       --  general cases of '*' and + are somewhat optimized: they use
1673       --  the same NOTHING node as both the endmarker for their branch
1674       --  list and the body of the last branch. It might seem that
1675       --  this node could be dispensed with entirely, but the endmarker
1676       --  role is not redundant.
1677 
1678       procedure Parse_Piece
1679         (Expr_Flags : out Expression_Flags;
1680          IP         : out Pointer)
1681       is
1682          Op        : Character;
1683          New_Flags : Expression_Flags;
1684          Greedy    : Boolean := True;
1685 
1686       begin
1687          Parse_Atom (New_Flags, IP);
1688 
1689          if IP = 0 then
1690             return;
1691          end if;
1692 
1693          if Parse_Pos > Parse_End
1694            or else not Is_Mult (Parse_Pos)
1695          then
1696             Expr_Flags := New_Flags;
1697             return;
1698          end if;
1699 
1700          Op := Expression (Parse_Pos);
1701 
1702          Expr_Flags :=
1703            (if Op /= '+'
1704             then (SP_Start  => True, others => False)
1705             else (Has_Width => True, others => False));
1706 
1707          --  Detect non greedy operators in the easy cases
1708 
1709          if Op /= '{'
1710            and then Parse_Pos + 1 <= Parse_End
1711            and then Expression (Parse_Pos + 1) = '?'
1712          then
1713             Greedy := False;
1714             Parse_Pos := Parse_Pos + 1;
1715          end if;
1716 
1717          --  Generate the byte code
1718 
1719          case Op is
1720             when '*' =>
1721 
1722                if New_Flags.Simple then
1723                   Insert_Operator (STAR, IP, Greedy);
1724                else
1725                   Link_Tail (IP, Emit_Node (WHILEM));
1726                   Insert_Curly_Operator
1727                     (CURLYX, 0, Max_Curly_Repeat, IP, Greedy);
1728                   Link_Tail (IP, Emit_Node (NOTHING));
1729                end if;
1730 
1731             when '+' =>
1732 
1733                if New_Flags.Simple then
1734                   Insert_Operator (PLUS, IP, Greedy);
1735                else
1736                   Link_Tail (IP, Emit_Node (WHILEM));
1737                   Insert_Curly_Operator
1738                     (CURLYX, 1, Max_Curly_Repeat, IP, Greedy);
1739                   Link_Tail (IP, Emit_Node (NOTHING));
1740                end if;
1741 
1742             when '?' =>
1743                if New_Flags.Simple then
1744                   Insert_Curly_Operator (CURLY, 0, 1, IP, Greedy);
1745                else
1746                   Link_Tail (IP, Emit_Node (WHILEM));
1747                   Insert_Curly_Operator (CURLYX, 0, 1, IP, Greedy);
1748                   Link_Tail (IP, Emit_Node (NOTHING));
1749                end if;
1750 
1751             when '{' =>
1752                declare
1753                   Min, Max : Natural;
1754 
1755                begin
1756                   Get_Curly_Arguments (Parse_Pos, Min, Max, Greedy);
1757 
1758                   if New_Flags.Simple then
1759                      Insert_Curly_Operator (CURLY, Min, Max, IP, Greedy);
1760                   else
1761                      Link_Tail (IP, Emit_Node (WHILEM));
1762                      Insert_Curly_Operator (CURLYX, Min, Max, IP, Greedy);
1763                      Link_Tail (IP, Emit_Node (NOTHING));
1764                   end if;
1765                end;
1766 
1767             when others =>
1768                null;
1769          end case;
1770 
1771          Parse_Pos := Parse_Pos + 1;
1772 
1773          if Parse_Pos <= Parse_End
1774            and then Is_Mult (Parse_Pos)
1775          then
1776             Fail ("nested *+{");
1777          end if;
1778       end Parse_Piece;
1779 
1780       ---------------------------------
1781       -- Parse_Posix_Character_Class --
1782       ---------------------------------
1783 
1784       function Parse_Posix_Character_Class return Std_Class is
1785          Invert : Boolean := False;
1786          Class  : Std_Class := ANYOF_NONE;
1787          E      : String renames Expression;
1788 
1789          --  Class names. Note that code assumes that the length of all
1790          --  classes starting with the same letter have the same length.
1791 
1792          Alnum   : constant String := "alnum:]";
1793          Alpha   : constant String := "alpha:]";
1794          Ascii_C : constant String := "ascii:]";
1795          Cntrl   : constant String := "cntrl:]";
1796          Digit   : constant String := "digit:]";
1797          Graph   : constant String := "graph:]";
1798          Lower   : constant String := "lower:]";
1799          Print   : constant String := "print:]";
1800          Punct   : constant String := "punct:]";
1801          Space   : constant String := "space:]";
1802          Upper   : constant String := "upper:]";
1803          Word    : constant String := "word:]";
1804          Xdigit  : constant String := "xdigit:]";
1805 
1806       begin
1807          --  Case of character class specified
1808 
1809          if Parse_Pos <= Parse_End
1810            and then Expression (Parse_Pos) = ':'
1811          then
1812             Parse_Pos := Parse_Pos + 1;
1813 
1814             --  Do we have something like:  [[:^alpha:]]
1815 
1816             if Parse_Pos <= Parse_End
1817               and then Expression (Parse_Pos) = '^'
1818             then
1819                Invert := True;
1820                Parse_Pos := Parse_Pos + 1;
1821             end if;
1822 
1823             --  Check for class names based on first letter
1824 
1825             case Expression (Parse_Pos) is
1826                when 'a' =>
1827 
1828                   --  All 'a' classes have the same length (Alnum'Length)
1829 
1830                   if Parse_Pos + Alnum'Length - 1 <= Parse_End then
1831                      if
1832                        E (Parse_Pos .. Parse_Pos + Alnum'Length - 1) = Alnum
1833                      then
1834                         Class :=
1835                           (if Invert then ANYOF_NALNUMC else ANYOF_ALNUMC);
1836                         Parse_Pos := Parse_Pos + Alnum'Length;
1837 
1838                      elsif
1839                        E (Parse_Pos .. Parse_Pos + Alpha'Length - 1) = Alpha
1840                      then
1841                         Class :=
1842                           (if Invert then ANYOF_NALPHA else ANYOF_ALPHA);
1843                         Parse_Pos := Parse_Pos + Alpha'Length;
1844 
1845                      elsif E (Parse_Pos .. Parse_Pos + Ascii_C'Length - 1) =
1846                                                                       Ascii_C
1847                      then
1848                         Class :=
1849                           (if Invert then ANYOF_NASCII else ANYOF_ASCII);
1850                         Parse_Pos := Parse_Pos + Ascii_C'Length;
1851                      else
1852                         Fail ("Invalid character class: " & E);
1853                      end if;
1854 
1855                   else
1856                      Fail ("Invalid character class: " & E);
1857                   end if;
1858 
1859                when 'c' =>
1860                   if Parse_Pos + Cntrl'Length - 1 <= Parse_End
1861                     and then
1862                       E (Parse_Pos .. Parse_Pos + Cntrl'Length - 1) = Cntrl
1863                   then
1864                      Class := (if Invert then ANYOF_NCNTRL else ANYOF_CNTRL);
1865                      Parse_Pos := Parse_Pos + Cntrl'Length;
1866                   else
1867                      Fail ("Invalid character class: " & E);
1868                   end if;
1869 
1870                when 'd' =>
1871                   if Parse_Pos + Digit'Length - 1 <= Parse_End
1872                     and then
1873                       E (Parse_Pos .. Parse_Pos + Digit'Length - 1) = Digit
1874                   then
1875                      Class := (if Invert then ANYOF_NDIGIT else ANYOF_DIGIT);
1876                      Parse_Pos := Parse_Pos + Digit'Length;
1877                   end if;
1878 
1879                when 'g' =>
1880                   if Parse_Pos + Graph'Length - 1 <= Parse_End
1881                     and then
1882                       E (Parse_Pos .. Parse_Pos + Graph'Length - 1) = Graph
1883                   then
1884                      Class := (if Invert then ANYOF_NGRAPH else ANYOF_GRAPH);
1885                      Parse_Pos := Parse_Pos + Graph'Length;
1886                   else
1887                      Fail ("Invalid character class: " & E);
1888                   end if;
1889 
1890                when 'l' =>
1891                   if Parse_Pos + Lower'Length - 1 <= Parse_End
1892                     and then
1893                       E (Parse_Pos .. Parse_Pos + Lower'Length - 1) = Lower
1894                   then
1895                      Class := (if Invert then ANYOF_NLOWER else ANYOF_LOWER);
1896                      Parse_Pos := Parse_Pos + Lower'Length;
1897                   else
1898                      Fail ("Invalid character class: " & E);
1899                   end if;
1900 
1901                when 'p' =>
1902 
1903                   --  All 'p' classes have the same length
1904 
1905                   if Parse_Pos + Print'Length - 1 <= Parse_End then
1906                      if
1907                        E (Parse_Pos .. Parse_Pos + Print'Length - 1) = Print
1908                      then
1909                         Class :=
1910                           (if Invert then ANYOF_NPRINT else ANYOF_PRINT);
1911                         Parse_Pos := Parse_Pos + Print'Length;
1912 
1913                      elsif
1914                        E (Parse_Pos .. Parse_Pos + Punct'Length - 1) = Punct
1915                      then
1916                         Class :=
1917                           (if Invert then ANYOF_NPUNCT else ANYOF_PUNCT);
1918                         Parse_Pos := Parse_Pos + Punct'Length;
1919 
1920                      else
1921                         Fail ("Invalid character class: " & E);
1922                      end if;
1923 
1924                   else
1925                      Fail ("Invalid character class: " & E);
1926                   end if;
1927 
1928                when 's' =>
1929                   if Parse_Pos + Space'Length - 1 <= Parse_End
1930                     and then
1931                       E (Parse_Pos .. Parse_Pos + Space'Length - 1) = Space
1932                   then
1933                      Class := (if Invert then ANYOF_NSPACE else ANYOF_SPACE);
1934                      Parse_Pos := Parse_Pos + Space'Length;
1935                   else
1936                      Fail ("Invalid character class: " & E);
1937                   end if;
1938 
1939                when 'u' =>
1940                   if Parse_Pos + Upper'Length - 1 <= Parse_End
1941                     and then
1942                       E (Parse_Pos .. Parse_Pos + Upper'Length - 1) = Upper
1943                   then
1944                      Class := (if Invert then ANYOF_NUPPER else ANYOF_UPPER);
1945                      Parse_Pos := Parse_Pos + Upper'Length;
1946                   else
1947                      Fail ("Invalid character class: " & E);
1948                   end if;
1949 
1950                when 'w' =>
1951                   if Parse_Pos + Word'Length - 1 <= Parse_End
1952                     and then
1953                       E (Parse_Pos .. Parse_Pos + Word'Length - 1) = Word
1954                   then
1955                      Class := (if Invert then ANYOF_NALNUM else ANYOF_ALNUM);
1956                      Parse_Pos := Parse_Pos + Word'Length;
1957                   else
1958                      Fail ("Invalid character class: " & E);
1959                   end if;
1960 
1961                when 'x' =>
1962                   if Parse_Pos + Xdigit'Length - 1 <= Parse_End
1963                     and then
1964                       E (Parse_Pos .. Parse_Pos + Xdigit'Length - 1) = Xdigit
1965                   then
1966                      Class := (if Invert then ANYOF_NXDIGIT else ANYOF_XDIGIT);
1967                      Parse_Pos := Parse_Pos + Xdigit'Length;
1968 
1969                   else
1970                      Fail ("Invalid character class: " & E);
1971                   end if;
1972 
1973                when others =>
1974                   Fail ("Invalid character class: " & E);
1975             end case;
1976 
1977          --  Character class not specified
1978 
1979          else
1980             return ANYOF_NONE;
1981          end if;
1982 
1983          return Class;
1984       end Parse_Posix_Character_Class;
1985 
1986       --  Local Declarations
1987 
1988       Result : Pointer;
1989 
1990       Expr_Flags : Expression_Flags;
1991       pragma Unreferenced (Expr_Flags);
1992 
1993    --  Start of processing for Compile
1994 
1995    begin
1996       Parse (False, False, Expr_Flags, Result);
1997 
1998       if Result = 0 then
1999          Fail ("Couldn't compile expression");
2000       end if;
2001 
2002       Final_Code_Size := Emit_Ptr - 1;
2003 
2004       --  Do we want to actually compile the expression, or simply get the
2005       --  code size ???
2006 
2007       if Emit_Ptr <= PM.Size then
2008          Optimize (PM);
2009       end if;
2010 
2011       PM.Flags := Flags;
2012    end Compile;
2013 
2014    function Compile
2015      (Expression : String;
2016       Flags      : Regexp_Flags := No_Flags) return Pattern_Matcher
2017    is
2018       --  Assume the compiled regexp will fit in 1000 chars. If it does not we
2019       --  will have to compile a second time once the correct size is known. If
2020       --  it fits, we save a significant amount of time by avoiding the second
2021       --  compilation.
2022 
2023       Dummy : Pattern_Matcher (1000);
2024       Size  : Program_Size;
2025 
2026    begin
2027       Compile (Dummy, Expression, Size, Flags);
2028 
2029       if Size <= Dummy.Size then
2030          return Pattern_Matcher'
2031            (Size             => Size,
2032             First            => Dummy.First,
2033             Anchored         => Dummy.Anchored,
2034             Must_Have        => Dummy.Must_Have,
2035             Must_Have_Length => Dummy.Must_Have_Length,
2036             Paren_Count      => Dummy.Paren_Count,
2037             Flags            => Dummy.Flags,
2038             Program          =>
2039               Dummy.Program
2040                 (Dummy.Program'First .. Dummy.Program'First + Size - 1));
2041       else
2042          --  We have to recompile now that we know the size
2043          --  ??? Can we use Ada 2005's return construct ?
2044 
2045          declare
2046             Result : Pattern_Matcher (Size);
2047          begin
2048             Compile (Result, Expression, Size, Flags);
2049             return Result;
2050          end;
2051       end if;
2052    end Compile;
2053 
2054    procedure Compile
2055      (Matcher    : out Pattern_Matcher;
2056       Expression : String;
2057       Flags      : Regexp_Flags := No_Flags)
2058    is
2059       Size : Program_Size;
2060 
2061    begin
2062       Compile (Matcher, Expression, Size, Flags);
2063 
2064       if Size > Matcher.Size then
2065          raise Expression_Error with "Pattern_Matcher is too small";
2066       end if;
2067    end Compile;
2068 
2069    --------------------
2070    -- Dump_Operation --
2071    --------------------
2072 
2073    procedure Dump_Operation
2074       (Program : Program_Data;
2075        Index   : Pointer;
2076        Indent  : Natural)
2077    is
2078       Current : Pointer := Index;
2079    begin
2080       Dump_Until (Program, Current, Current + 1, Indent);
2081    end Dump_Operation;
2082 
2083    ----------------
2084    -- Dump_Until --
2085    ----------------
2086 
2087    procedure Dump_Until
2088       (Program  : Program_Data;
2089        Index    : in out Pointer;
2090        Till     : Pointer;
2091        Indent   : Natural;
2092        Do_Print : Boolean := True)
2093    is
2094       function Image (S : String) return String;
2095       --  Remove leading space
2096 
2097       -----------
2098       -- Image --
2099       -----------
2100 
2101       function Image (S : String) return String is
2102       begin
2103          if S (S'First) = ' ' then
2104             return S (S'First + 1 .. S'Last);
2105          else
2106             return S;
2107          end if;
2108       end Image;
2109 
2110       --  Local variables
2111 
2112       Op           : Opcode;
2113       Next         : Pointer;
2114       Length       : Pointer;
2115       Local_Indent : Natural := Indent;
2116 
2117    --  Start of processing for Dump_Until
2118 
2119    begin
2120       while Index < Till loop
2121          Op   := Opcode'Val (Character'Pos ((Program (Index))));
2122          Next := Get_Next (Program, Index);
2123 
2124          if Do_Print then
2125             declare
2126                Point   : constant String := Pointer'Image (Index);
2127             begin
2128                Put ((1 .. 4 - Point'Length => ' ')
2129                     & Point & ":"
2130                     & (1 .. Local_Indent * 2 => ' ') & Opcode'Image (Op));
2131             end;
2132 
2133             --  Print the parenthesis number
2134 
2135             if Op = OPEN or else Op = CLOSE or else Op = REFF then
2136                Put (Image (Natural'Image
2137                             (Character'Pos
2138                                (Program (Index + Next_Pointer_Bytes)))));
2139             end if;
2140 
2141             if Next = Index then
2142                Put (" (-)");
2143             else
2144                Put (" (" & Image (Pointer'Image (Next)) & ")");
2145             end if;
2146          end if;
2147 
2148          case Op is
2149             when ANYOF =>
2150                declare
2151                   Bitmap       : Character_Class;
2152                   Last         : Character := ASCII.NUL;
2153                   Current      : Natural := 0;
2154                   Current_Char : Character;
2155 
2156                begin
2157                   Bitmap_Operand (Program, Index, Bitmap);
2158 
2159                   if Do_Print then
2160                      Put ("[");
2161 
2162                      while Current <= 255 loop
2163                         Current_Char := Character'Val (Current);
2164 
2165                         --  First item in a range
2166 
2167                         if Get_From_Class (Bitmap, Current_Char) then
2168                            Last := Current_Char;
2169 
2170                            --  Search for the last item in the range
2171 
2172                            loop
2173                               Current := Current + 1;
2174                               exit when Current > 255;
2175                               Current_Char := Character'Val (Current);
2176                               exit when
2177                                 not Get_From_Class (Bitmap, Current_Char);
2178                            end loop;
2179 
2180                            if not Is_Graphic (Last) then
2181                               Put (Last'Img);
2182                            else
2183                               Put (Last);
2184                            end if;
2185 
2186                            if Character'Succ (Last) /= Current_Char then
2187                               Put ("\-" & Character'Pred (Current_Char));
2188                            end if;
2189 
2190                         else
2191                            Current := Current + 1;
2192                         end if;
2193                      end loop;
2194 
2195                      Put_Line ("]");
2196                   end if;
2197 
2198                   Index := Index + Next_Pointer_Bytes + Bitmap'Length;
2199                end;
2200 
2201             when EXACT | EXACTF =>
2202                Length := String_Length (Program, Index);
2203                if Do_Print then
2204                   Put (" (" & Image (Program_Size'Image (Length + 1))
2205                           & " chars) <"
2206                           & String (Program (String_Operand (Index)
2207                                               .. String_Operand (Index)
2208                                               + Length)));
2209                   Put_Line (">");
2210                end if;
2211 
2212                Index := String_Operand (Index) + Length + 1;
2213 
2214                --  Node operand
2215 
2216             when BRANCH | STAR | PLUS =>
2217                if Do_Print then
2218                   New_Line;
2219                end if;
2220 
2221                Index  := Index + Next_Pointer_Bytes;
2222                Dump_Until (Program, Index, Pointer'Min (Next, Till),
2223                            Local_Indent + 1, Do_Print);
2224 
2225             when CURLY | CURLYX =>
2226                if Do_Print then
2227                   Put_Line
2228                     (" {"
2229                     & Image (Natural'Image
2230                        (Read_Natural (Program, Index + Next_Pointer_Bytes)))
2231                     & ","
2232                     & Image (Natural'Image (Read_Natural (Program, Index + 5)))
2233                     & "}");
2234                end if;
2235 
2236                Index  := Index + 7;
2237                Dump_Until (Program, Index, Pointer'Min (Next, Till),
2238                            Local_Indent + 1, Do_Print);
2239 
2240             when OPEN =>
2241                if Do_Print then
2242                   New_Line;
2243                end if;
2244 
2245                Index := Index + 4;
2246                Local_Indent := Local_Indent + 1;
2247 
2248             when CLOSE | REFF =>
2249                if Do_Print then
2250                   New_Line;
2251                end if;
2252 
2253                Index := Index + 4;
2254 
2255                if Op = CLOSE then
2256                   Local_Indent := Local_Indent - 1;
2257                end if;
2258 
2259             when others =>
2260                Index := Index + Next_Pointer_Bytes;
2261 
2262                if Do_Print then
2263                   New_Line;
2264                end if;
2265 
2266                exit when Op = EOP;
2267          end case;
2268       end loop;
2269    end Dump_Until;
2270 
2271    ----------
2272    -- Dump --
2273    ----------
2274 
2275    procedure Dump (Self : Pattern_Matcher) is
2276       Program : Program_Data renames Self.Program;
2277       Index   : Pointer := Program'First;
2278 
2279    --  Start of processing for Dump
2280 
2281    begin
2282       Put_Line ("Must start with (Self.First) = "
2283                 & Character'Image (Self.First));
2284 
2285       if (Self.Flags and Case_Insensitive) /= 0 then
2286          Put_Line ("  Case_Insensitive mode");
2287       end if;
2288 
2289       if (Self.Flags and Single_Line) /= 0 then
2290          Put_Line ("  Single_Line mode");
2291       end if;
2292 
2293       if (Self.Flags and Multiple_Lines) /= 0 then
2294          Put_Line ("  Multiple_Lines mode");
2295       end if;
2296 
2297       Dump_Until (Program, Index, Self.Program'Last + 1, 0);
2298    end Dump;
2299 
2300    --------------------
2301    -- Get_From_Class --
2302    --------------------
2303 
2304    function Get_From_Class
2305      (Bitmap : Character_Class;
2306       C      : Character) return Boolean
2307    is
2308       Value : constant Class_Byte := Character'Pos (C);
2309    begin
2310       return
2311         (Bitmap (Value / 8) and Bit_Conversion (Value mod 8)) /= 0;
2312    end Get_From_Class;
2313 
2314    --------------
2315    -- Get_Next --
2316    --------------
2317 
2318    function Get_Next (Program : Program_Data; IP : Pointer) return Pointer is
2319    begin
2320       return IP + Pointer (Read_Natural (Program, IP + 1));
2321    end Get_Next;
2322 
2323    --------------
2324    -- Is_Alnum --
2325    --------------
2326 
2327    function Is_Alnum (C : Character) return Boolean is
2328    begin
2329       return Is_Alphanumeric (C) or else C = '_';
2330    end Is_Alnum;
2331 
2332    ------------------
2333    -- Is_Printable --
2334    ------------------
2335 
2336    function Is_Printable (C : Character) return Boolean is
2337    begin
2338       --  Printable if space or graphic character or other whitespace
2339       --  Other white space includes (HT/LF/VT/FF/CR = codes 9-13)
2340 
2341       return C in Character'Val (32) .. Character'Val (126)
2342         or else C in ASCII.HT .. ASCII.CR;
2343    end Is_Printable;
2344 
2345    --------------------
2346    -- Is_White_Space --
2347    --------------------
2348 
2349    function Is_White_Space (C : Character) return Boolean is
2350    begin
2351       --  Note: HT = 9, LF = 10, VT = 11, FF = 12, CR = 13
2352 
2353       return C = ' ' or else C in ASCII.HT .. ASCII.CR;
2354    end Is_White_Space;
2355 
2356    -----------
2357    -- Match --
2358    -----------
2359 
2360    procedure Match
2361      (Self       : Pattern_Matcher;
2362       Data       : String;
2363       Matches    : out Match_Array;
2364       Data_First : Integer := -1;
2365       Data_Last  : Positive := Positive'Last)
2366    is
2367       Program : Program_Data renames Self.Program; -- Shorter notation
2368 
2369       First_In_Data : constant Integer := Integer'Max (Data_First, Data'First);
2370       Last_In_Data  : constant Integer := Integer'Min (Data_Last, Data'Last);
2371 
2372       --  Global work variables
2373 
2374       Input_Pos : Natural;           -- String-input pointer
2375       BOL_Pos   : Natural;           -- Beginning of input, for ^ check
2376       Matched   : Boolean := False;  -- Until proven True
2377 
2378       Matches_Full : Match_Array (0 .. Natural'Max (Self.Paren_Count,
2379                                                     Matches'Last));
2380       --  Stores the value of all the parenthesis pairs.
2381       --  We do not use directly Matches, so that we can also use back
2382       --  references (REFF) even if Matches is too small.
2383 
2384       type Natural_Array is array (Match_Count range <>) of Natural;
2385       Matches_Tmp : Natural_Array (Matches_Full'Range);
2386       --  Save the opening position of parenthesis
2387 
2388       Last_Paren  : Natural := 0;
2389       --  Last parenthesis seen
2390 
2391       Greedy : Boolean := True;
2392       --  True if the next operator should be greedy
2393 
2394       type Current_Curly_Record;
2395       type Current_Curly_Access is access all Current_Curly_Record;
2396       type Current_Curly_Record is record
2397          Paren_Floor : Natural;  --  How far back to strip parenthesis data
2398          Cur         : Integer;  --  How many instances of scan we've matched
2399          Min         : Natural;  --  Minimal number of scans to match
2400          Max         : Natural;  --  Maximal number of scans to match
2401          Greedy      : Boolean;  --  Whether to work our way up or down
2402          Scan        : Pointer;  --  The thing to match
2403          Next        : Pointer;  --  What has to match after it
2404          Lastloc     : Natural;  --  Where we started matching this scan
2405          Old_Cc      : Current_Curly_Access; --  Before we started this one
2406       end record;
2407       --  Data used to handle the curly operator and the plus and star
2408       --  operators for complex expressions.
2409 
2410       Current_Curly : Current_Curly_Access := null;
2411       --  The curly currently being processed
2412 
2413       -----------------------
2414       -- Local Subprograms --
2415       -----------------------
2416 
2417       function Index (Start : Positive; C : Character) return Natural;
2418       --  Find character C in Data starting at Start and return position
2419 
2420       function Repeat
2421         (IP  : Pointer;
2422          Max : Natural := Natural'Last) return Natural;
2423       --  Repeatedly match something simple, report how many
2424       --  It only matches on things of length 1.
2425       --  Starting from Input_Pos, it matches at most Max CURLY.
2426 
2427       function Try (Pos : Positive) return Boolean;
2428       --  Try to match at specific point
2429 
2430       function Match (IP : Pointer) return Boolean;
2431       --  This is the main matching routine. Conceptually the strategy
2432       --  is simple:  check to see whether the current node matches,
2433       --  call self recursively to see whether the rest matches,
2434       --  and then act accordingly.
2435       --
2436       --  In practice Match makes some effort to avoid recursion, in
2437       --  particular by going through "ordinary" nodes (that don't
2438       --  need to know whether the rest of the match failed) by
2439       --  using a loop instead of recursion.
2440       --  Why is the above comment part of the spec rather than body ???
2441 
2442       function Match_Whilem return Boolean;
2443       --  Return True if a WHILEM matches the Current_Curly
2444 
2445       function Recurse_Match (IP : Pointer; From : Natural) return Boolean;
2446       pragma Inline (Recurse_Match);
2447       --  Calls Match recursively. It saves and restores the parenthesis
2448       --  status and location in the input stream correctly, so that
2449       --  backtracking is possible
2450 
2451       function Match_Simple_Operator
2452         (Op     : Opcode;
2453          Scan   : Pointer;
2454          Next   : Pointer;
2455          Greedy : Boolean) return Boolean;
2456       --  Return True it the simple operator (possibly non-greedy) matches
2457 
2458       Dump_Indent : Integer := -1;
2459       procedure Dump_Current (Scan : Pointer; Prefix : Boolean := True);
2460       procedure Dump_Error (Msg : String);
2461       --  Debug: print the current context
2462 
2463       pragma Inline (Index);
2464       pragma Inline (Repeat);
2465 
2466       --  These are two complex functions, but used only once
2467 
2468       pragma Inline (Match_Whilem);
2469       pragma Inline (Match_Simple_Operator);
2470 
2471       -----------
2472       -- Index --
2473       -----------
2474 
2475       function Index (Start : Positive; C : Character) return Natural is
2476       begin
2477          for J in Start .. Last_In_Data loop
2478             if Data (J) = C then
2479                return J;
2480             end if;
2481          end loop;
2482 
2483          return 0;
2484       end Index;
2485 
2486       -------------------
2487       -- Recurse_Match --
2488       -------------------
2489 
2490       function Recurse_Match (IP : Pointer; From : Natural) return Boolean is
2491          L     : constant Natural := Last_Paren;
2492          Tmp_F : constant Match_Array :=
2493                    Matches_Full (From + 1 .. Matches_Full'Last);
2494          Start : constant Natural_Array :=
2495                    Matches_Tmp (From + 1 .. Matches_Tmp'Last);
2496          Input : constant Natural := Input_Pos;
2497 
2498          Dump_Indent_Save : constant Integer := Dump_Indent;
2499 
2500       begin
2501          if Match (IP) then
2502             return True;
2503          end if;
2504 
2505          Last_Paren := L;
2506          Matches_Full (Tmp_F'Range) := Tmp_F;
2507          Matches_Tmp (Start'Range) := Start;
2508          Input_Pos := Input;
2509          Dump_Indent := Dump_Indent_Save;
2510          return False;
2511       end Recurse_Match;
2512 
2513       ------------------
2514       -- Dump_Current --
2515       ------------------
2516 
2517       procedure Dump_Current (Scan : Pointer; Prefix : Boolean := True) is
2518          Length : constant := 10;
2519          Pos    : constant String := Integer'Image (Input_Pos);
2520 
2521       begin
2522          if Prefix then
2523             Put ((1 .. 5 - Pos'Length => ' '));
2524             Put (Pos & " <"
2525                  & Data (Input_Pos
2526                      .. Integer'Min (Last_In_Data, Input_Pos + Length - 1)));
2527             Put ((1 .. Length - 1 - Last_In_Data + Input_Pos => ' '));
2528             Put ("> |");
2529 
2530          else
2531             Put ("                    ");
2532          end if;
2533 
2534          Dump_Operation (Program, Scan, Indent => Dump_Indent);
2535       end Dump_Current;
2536 
2537       ----------------
2538       -- Dump_Error --
2539       ----------------
2540 
2541       procedure Dump_Error (Msg : String) is
2542       begin
2543          Put ("                   |     ");
2544          Put ((1 .. Dump_Indent * 2 => ' '));
2545          Put_Line (Msg);
2546       end Dump_Error;
2547 
2548       -----------
2549       -- Match --
2550       -----------
2551 
2552       function Match (IP : Pointer) return Boolean is
2553          Scan   : Pointer := IP;
2554          Next   : Pointer;
2555          Op     : Opcode;
2556          Result : Boolean;
2557 
2558       begin
2559          Dump_Indent := Dump_Indent + 1;
2560 
2561          State_Machine :
2562          loop
2563             pragma Assert (Scan /= 0);
2564 
2565             --  Determine current opcode and count its usage in debug mode
2566 
2567             Op := Opcode'Val (Character'Pos (Program (Scan)));
2568 
2569             --  Calculate offset of next instruction. Second character is most
2570             --  significant in Program_Data.
2571 
2572             Next := Get_Next (Program, Scan);
2573 
2574             if Debug then
2575                Dump_Current (Scan);
2576             end if;
2577 
2578             case Op is
2579                when EOP =>
2580                   Dump_Indent := Dump_Indent - 1;
2581                   return True;  --  Success
2582 
2583                when BRANCH =>
2584                   if Program (Next) /= BRANCH then
2585                      Next := Operand (Scan); -- No choice, avoid recursion
2586 
2587                   else
2588                      loop
2589                         if Recurse_Match (Operand (Scan), 0) then
2590                            Dump_Indent := Dump_Indent - 1;
2591                            return True;
2592                         end if;
2593 
2594                         Scan := Get_Next (Program, Scan);
2595                         exit when Scan = 0 or else Program (Scan) /= BRANCH;
2596                      end loop;
2597 
2598                      exit State_Machine;
2599                   end if;
2600 
2601                when NOTHING =>
2602                   null;
2603 
2604                when BOL =>
2605                   exit State_Machine when Input_Pos /= BOL_Pos
2606                     and then ((Self.Flags and Multiple_Lines) = 0
2607                                or else Data (Input_Pos - 1) /= ASCII.LF);
2608 
2609                when MBOL =>
2610                   exit State_Machine when Input_Pos /= BOL_Pos
2611                     and then Data (Input_Pos - 1) /= ASCII.LF;
2612 
2613                when SBOL =>
2614                   exit State_Machine when Input_Pos /= BOL_Pos;
2615 
2616                when EOL =>
2617 
2618                   --  A combination of MEOL and SEOL
2619 
2620                   if (Self.Flags and Multiple_Lines) = 0 then
2621 
2622                      --  Single line mode
2623 
2624                      exit State_Machine when Input_Pos <= Data'Last;
2625 
2626                   elsif Input_Pos <= Last_In_Data then
2627                      exit State_Machine when Data (Input_Pos) /= ASCII.LF;
2628                   else
2629                      exit State_Machine when Last_In_Data /= Data'Last;
2630                   end if;
2631 
2632                when MEOL =>
2633                   if Input_Pos <= Last_In_Data then
2634                      exit State_Machine when Data (Input_Pos) /= ASCII.LF;
2635                   else
2636                      exit State_Machine when Last_In_Data /= Data'Last;
2637                   end if;
2638 
2639                when SEOL =>
2640 
2641                   --  If there is a character before Data'Last (even if
2642                   --  Last_In_Data stops before then), we can't have the
2643                   --  end of the line.
2644 
2645                   exit State_Machine when Input_Pos <= Data'Last;
2646 
2647                when BOUND | NBOUND =>
2648 
2649                   --  Was last char in word ?
2650 
2651                   declare
2652                      N  : Boolean := False;
2653                      Ln : Boolean := False;
2654 
2655                   begin
2656                      if Input_Pos /= First_In_Data then
2657                         N := Is_Alnum (Data (Input_Pos - 1));
2658                      end if;
2659 
2660                      Ln :=
2661                        (if Input_Pos > Last_In_Data
2662                         then False
2663                         else Is_Alnum (Data (Input_Pos)));
2664 
2665                      if Op = BOUND then
2666                         if N = Ln then
2667                            exit State_Machine;
2668                         end if;
2669                      else
2670                         if N /= Ln then
2671                            exit State_Machine;
2672                         end if;
2673                      end if;
2674                   end;
2675 
2676                when SPACE =>
2677                   exit State_Machine when Input_Pos > Last_In_Data
2678                     or else not Is_White_Space (Data (Input_Pos));
2679                   Input_Pos := Input_Pos + 1;
2680 
2681                when NSPACE =>
2682                   exit State_Machine when Input_Pos > Last_In_Data
2683                     or else Is_White_Space (Data (Input_Pos));
2684                   Input_Pos := Input_Pos + 1;
2685 
2686                when DIGIT =>
2687                   exit State_Machine when Input_Pos > Last_In_Data
2688                     or else not Is_Digit (Data (Input_Pos));
2689                   Input_Pos := Input_Pos + 1;
2690 
2691                when NDIGIT =>
2692                   exit State_Machine when Input_Pos > Last_In_Data
2693                     or else Is_Digit (Data (Input_Pos));
2694                   Input_Pos := Input_Pos + 1;
2695 
2696                when ALNUM =>
2697                   exit State_Machine when Input_Pos > Last_In_Data
2698                     or else not Is_Alnum (Data (Input_Pos));
2699                   Input_Pos := Input_Pos + 1;
2700 
2701                when NALNUM =>
2702                   exit State_Machine when Input_Pos > Last_In_Data
2703                     or else Is_Alnum (Data (Input_Pos));
2704                   Input_Pos := Input_Pos + 1;
2705 
2706                when ANY =>
2707                   exit State_Machine when Input_Pos > Last_In_Data
2708                     or else Data (Input_Pos) = ASCII.LF;
2709                   Input_Pos := Input_Pos + 1;
2710 
2711                when SANY =>
2712                   exit State_Machine when Input_Pos > Last_In_Data;
2713                   Input_Pos := Input_Pos + 1;
2714 
2715                when EXACT =>
2716                   declare
2717                      Opnd    : Pointer  := String_Operand (Scan);
2718                      Current : Positive := Input_Pos;
2719                      Last    : constant Pointer :=
2720                                  Opnd + String_Length (Program, Scan);
2721 
2722                   begin
2723                      while Opnd <= Last loop
2724                         exit State_Machine when Current > Last_In_Data
2725                           or else Program (Opnd) /= Data (Current);
2726                         Current := Current + 1;
2727                         Opnd := Opnd + 1;
2728                      end loop;
2729 
2730                      Input_Pos := Current;
2731                   end;
2732 
2733                when EXACTF =>
2734                   declare
2735                      Opnd    : Pointer  := String_Operand (Scan);
2736                      Current : Positive := Input_Pos;
2737 
2738                      Last : constant Pointer :=
2739                               Opnd + String_Length (Program, Scan);
2740 
2741                   begin
2742                      while Opnd <= Last loop
2743                         exit State_Machine when Current > Last_In_Data
2744                           or else Program (Opnd) /= To_Lower (Data (Current));
2745                         Current := Current + 1;
2746                         Opnd := Opnd + 1;
2747                      end loop;
2748 
2749                      Input_Pos := Current;
2750                   end;
2751 
2752                when ANYOF =>
2753                   declare
2754                      Bitmap : Character_Class;
2755                   begin
2756                      Bitmap_Operand (Program, Scan, Bitmap);
2757                      exit State_Machine when Input_Pos > Last_In_Data
2758                        or else not Get_From_Class (Bitmap, Data (Input_Pos));
2759                      Input_Pos := Input_Pos + 1;
2760                   end;
2761 
2762                when OPEN =>
2763                   declare
2764                      No : constant Natural :=
2765                             Character'Pos (Program (Operand (Scan)));
2766                   begin
2767                      Matches_Tmp (No) := Input_Pos;
2768                   end;
2769 
2770                when CLOSE =>
2771                   declare
2772                      No : constant Natural :=
2773                             Character'Pos (Program (Operand (Scan)));
2774 
2775                   begin
2776                      Matches_Full (No) := (Matches_Tmp (No), Input_Pos - 1);
2777 
2778                      if Last_Paren < No then
2779                         Last_Paren := No;
2780                      end if;
2781                   end;
2782 
2783                when REFF =>
2784                   declare
2785                      No : constant Natural :=
2786                             Character'Pos (Program (Operand (Scan)));
2787 
2788                      Data_Pos : Natural;
2789 
2790                   begin
2791                      --  If we haven't seen that parenthesis yet
2792 
2793                      if Last_Paren < No then
2794                         Dump_Indent := Dump_Indent - 1;
2795 
2796                         if Debug then
2797                            Dump_Error ("REFF: No match, backtracking");
2798                         end if;
2799 
2800                         return False;
2801                      end if;
2802 
2803                      Data_Pos := Matches_Full (No).First;
2804 
2805                      while Data_Pos <= Matches_Full (No).Last loop
2806                         if Input_Pos > Last_In_Data
2807                           or else Data (Input_Pos) /= Data (Data_Pos)
2808                         then
2809                            Dump_Indent := Dump_Indent - 1;
2810 
2811                            if Debug then
2812                               Dump_Error ("REFF: No match, backtracking");
2813                            end if;
2814 
2815                            return False;
2816                         end if;
2817 
2818                         Input_Pos := Input_Pos + 1;
2819                         Data_Pos := Data_Pos + 1;
2820                      end loop;
2821                   end;
2822 
2823                when MINMOD =>
2824                   Greedy := False;
2825 
2826                when STAR | PLUS | CURLY =>
2827                   declare
2828                      Greed : constant Boolean := Greedy;
2829                   begin
2830                      Greedy := True;
2831                      Result := Match_Simple_Operator (Op, Scan, Next, Greed);
2832                      Dump_Indent := Dump_Indent - 1;
2833                      return Result;
2834                   end;
2835 
2836                when CURLYX =>
2837 
2838                   --  Looking at something like:
2839 
2840                   --    1: CURLYX {n,m}  (->4)
2841                   --    2:   code for complex thing  (->3)
2842                   --    3:   WHILEM (->0)
2843                   --    4: NOTHING
2844 
2845                   declare
2846                      Min : constant Natural :=
2847                              Read_Natural (Program, Scan + Next_Pointer_Bytes);
2848                      Max : constant Natural :=
2849                              Read_Natural
2850                                 (Program, Scan + Next_Pointer_Bytes + 2);
2851                      Cc  : aliased Current_Curly_Record;
2852 
2853                      Has_Match : Boolean;
2854 
2855                   begin
2856                      Cc := (Paren_Floor => Last_Paren,
2857                             Cur         => -1,
2858                             Min         => Min,
2859                             Max         => Max,
2860                             Greedy      => Greedy,
2861                             Scan        => Scan + 7,
2862                             Next        => Next,
2863                             Lastloc     => 0,
2864                             Old_Cc      => Current_Curly);
2865                      Greedy := True;
2866                      Current_Curly := Cc'Unchecked_Access;
2867 
2868                      Has_Match := Match (Next - Next_Pointer_Bytes);
2869 
2870                      --  Start on the WHILEM
2871 
2872                      Current_Curly := Cc.Old_Cc;
2873                      Dump_Indent := Dump_Indent - 1;
2874 
2875                      if not Has_Match then
2876                         if Debug then
2877                            Dump_Error ("CURLYX failed...");
2878                         end if;
2879                      end if;
2880 
2881                      return Has_Match;
2882                   end;
2883 
2884                when WHILEM =>
2885                   Result := Match_Whilem;
2886                   Dump_Indent := Dump_Indent - 1;
2887 
2888                   if Debug and then not Result then
2889                      Dump_Error ("WHILEM: no match, backtracking");
2890                   end if;
2891 
2892                   return Result;
2893             end case;
2894 
2895             Scan := Next;
2896          end loop State_Machine;
2897 
2898          if Debug then
2899             Dump_Error ("failed...");
2900             Dump_Indent := Dump_Indent - 1;
2901          end if;
2902 
2903          --  If we get here, there is no match. For successful matches when EOP
2904          --  is the terminating point.
2905 
2906          return False;
2907       end Match;
2908 
2909       ---------------------------
2910       -- Match_Simple_Operator --
2911       ---------------------------
2912 
2913       function Match_Simple_Operator
2914         (Op     : Opcode;
2915          Scan   : Pointer;
2916          Next   : Pointer;
2917          Greedy : Boolean) return Boolean
2918       is
2919          Next_Char       : Character := ASCII.NUL;
2920          Next_Char_Known : Boolean := False;
2921          No              : Integer;  --  Can be negative
2922          Min             : Natural;
2923          Max             : Natural := Natural'Last;
2924          Operand_Code    : Pointer;
2925          Old             : Natural;
2926          Last_Pos        : Natural;
2927          Save            : constant Natural := Input_Pos;
2928 
2929       begin
2930          --  Lookahead to avoid useless match attempts when we know what
2931          --  character comes next.
2932 
2933          if Program (Next) = EXACT then
2934             Next_Char := Program (String_Operand (Next));
2935             Next_Char_Known := True;
2936          end if;
2937 
2938          --  Find the minimal and maximal values for the operator
2939 
2940          case Op is
2941             when STAR =>
2942                Min := 0;
2943                Operand_Code := Operand (Scan);
2944 
2945             when PLUS =>
2946                Min := 1;
2947                Operand_Code := Operand (Scan);
2948 
2949             when others =>
2950                Min := Read_Natural (Program, Scan + Next_Pointer_Bytes);
2951                Max := Read_Natural (Program, Scan + Next_Pointer_Bytes + 2);
2952                Operand_Code := Scan + 7;
2953          end case;
2954 
2955          if Debug then
2956             Dump_Current (Operand_Code, Prefix => False);
2957          end if;
2958 
2959          --  Non greedy operators
2960 
2961          if not Greedy then
2962 
2963             --  Test we can repeat at least Min times
2964 
2965             if Min /= 0 then
2966                No := Repeat (Operand_Code, Min);
2967 
2968                if No < Min then
2969                   if Debug then
2970                      Dump_Error ("failed... matched" & No'Img & " times");
2971                   end if;
2972 
2973                   return False;
2974                end if;
2975             end if;
2976 
2977             Old := Input_Pos;
2978 
2979             --  Find the place where 'next' could work
2980 
2981             if Next_Char_Known then
2982 
2983                --  Last position to check
2984 
2985                if Max = Natural'Last then
2986                   Last_Pos := Last_In_Data;
2987                else
2988                   Last_Pos := Input_Pos + Max;
2989 
2990                   if Last_Pos > Last_In_Data then
2991                      Last_Pos := Last_In_Data;
2992                   end if;
2993                end if;
2994 
2995                --  Look for the first possible opportunity
2996 
2997                if Debug then
2998                   Dump_Error ("Next_Char must be " & Next_Char);
2999                end if;
3000 
3001                loop
3002                   --  Find the next possible position
3003 
3004                   while Input_Pos <= Last_Pos
3005                     and then Data (Input_Pos) /= Next_Char
3006                   loop
3007                      Input_Pos := Input_Pos + 1;
3008                   end loop;
3009 
3010                   if Input_Pos > Last_Pos then
3011                      return False;
3012                   end if;
3013 
3014                   --  Check that we still match if we stop at the position we
3015                   --  just found.
3016 
3017                   declare
3018                      Num : constant Natural := Input_Pos - Old;
3019 
3020                   begin
3021                      Input_Pos := Old;
3022 
3023                      if Debug then
3024                         Dump_Error ("Would we still match at that position?");
3025                      end if;
3026 
3027                      if Repeat (Operand_Code, Num) < Num then
3028                         return False;
3029                      end if;
3030                   end;
3031 
3032                   --  Input_Pos now points to the new position
3033 
3034                   if Match (Get_Next (Program, Scan)) then
3035                      return True;
3036                   end if;
3037 
3038                   Old := Input_Pos;
3039                   Input_Pos := Input_Pos + 1;
3040                end loop;
3041 
3042             --  We do not know what the next character is
3043 
3044             else
3045                while Max >= Min loop
3046                   if Debug then
3047                      Dump_Error ("Non-greedy repeat, N=" & Min'Img);
3048                      Dump_Error ("Do we still match Next if we stop here?");
3049                   end if;
3050 
3051                   --  If the next character matches
3052 
3053                   if Recurse_Match (Next, 1) then
3054                      return True;
3055                   end if;
3056 
3057                   Input_Pos := Save + Min;
3058 
3059                   --  Could not or did not match -- move forward
3060 
3061                   if Repeat (Operand_Code, 1) /= 0 then
3062                      Min := Min + 1;
3063                   else
3064                      if Debug then
3065                         Dump_Error ("Non-greedy repeat failed...");
3066                      end if;
3067 
3068                      return False;
3069                   end if;
3070                end loop;
3071             end if;
3072 
3073             return False;
3074 
3075          --  Greedy operators
3076 
3077          else
3078             No := Repeat (Operand_Code, Max);
3079 
3080             if Debug and then No < Min then
3081                Dump_Error ("failed... matched" & No'Img & " times");
3082             end if;
3083 
3084             --  ??? Perl has some special code here in case the next
3085             --  instruction is of type EOL, since $ and \Z can match before
3086             --  *and* after newline at the end.
3087 
3088             --  ??? Perl has some special code here in case (paren) is True
3089 
3090             --  Else, if we don't have any parenthesis
3091 
3092             while No >= Min loop
3093                if not Next_Char_Known
3094                  or else (Input_Pos <= Last_In_Data
3095                            and then Data (Input_Pos) = Next_Char)
3096                then
3097                   if Match (Next) then
3098                      return True;
3099                   end if;
3100                end if;
3101 
3102                --  Could not or did not work, we back up
3103 
3104                No := No - 1;
3105                Input_Pos := Save + No;
3106             end loop;
3107 
3108             return False;
3109          end if;
3110       end Match_Simple_Operator;
3111 
3112       ------------------
3113       -- Match_Whilem --
3114       ------------------
3115 
3116       --  This is really hard to understand, because after we match what we
3117       --  are trying to match, we must make sure the rest of the REx is going
3118       --  to match for sure, and to do that we have to go back UP the parse
3119       --  tree by recursing ever deeper.  And if it fails, we have to reset
3120       --  our parent's current state that we can try again after backing off.
3121 
3122       function Match_Whilem return Boolean is
3123          Cc : constant Current_Curly_Access := Current_Curly;
3124 
3125          N  : constant Natural              := Cc.Cur + 1;
3126          Ln : Natural                       := 0;
3127 
3128          Lastloc : constant Natural := Cc.Lastloc;
3129          --  Detection of 0-len
3130 
3131       begin
3132          --  If degenerate scan matches "", assume scan done
3133 
3134          if Input_Pos = Cc.Lastloc
3135            and then N >= Cc.Min
3136          then
3137             --  Temporarily restore the old context, and check that we
3138             --  match was comes after CURLYX.
3139 
3140             Current_Curly := Cc.Old_Cc;
3141 
3142             if Current_Curly /= null then
3143                Ln := Current_Curly.Cur;
3144             end if;
3145 
3146             if Match (Cc.Next) then
3147                return True;
3148             end if;
3149 
3150             if Current_Curly /= null then
3151                Current_Curly.Cur := Ln;
3152             end if;
3153 
3154             Current_Curly := Cc;
3155             return False;
3156          end if;
3157 
3158          --  First, just match a string of min scans
3159 
3160          if N < Cc.Min then
3161             Cc.Cur := N;
3162             Cc.Lastloc := Input_Pos;
3163 
3164             if Debug then
3165                Dump_Error
3166                  ("Tests that we match at least" & Cc.Min'Img & " N=" & N'Img);
3167             end if;
3168 
3169             if Match (Cc.Scan) then
3170                return True;
3171             end if;
3172 
3173             Cc.Cur := N - 1;
3174             Cc.Lastloc := Lastloc;
3175 
3176             if Debug then
3177                Dump_Error ("failed...");
3178             end if;
3179 
3180             return False;
3181          end if;
3182 
3183          --  Prefer next over scan for minimal matching
3184 
3185          if not Cc.Greedy then
3186             Current_Curly := Cc.Old_Cc;
3187 
3188             if Current_Curly /= null then
3189                Ln := Current_Curly.Cur;
3190             end if;
3191 
3192             if Recurse_Match (Cc.Next, Cc.Paren_Floor) then
3193                return True;
3194             end if;
3195 
3196             if Current_Curly /= null then
3197                Current_Curly.Cur := Ln;
3198             end if;
3199 
3200             Current_Curly := Cc;
3201 
3202             --  Maximum greed exceeded ?
3203 
3204             if N >= Cc.Max then
3205                if Debug then
3206                   Dump_Error ("failed...");
3207                end if;
3208                return False;
3209             end if;
3210 
3211             --  Try scanning more and see if it helps
3212             Cc.Cur := N;
3213             Cc.Lastloc := Input_Pos;
3214 
3215             if Debug then
3216                Dump_Error ("Next failed, what about Current?");
3217             end if;
3218 
3219             if Recurse_Match (Cc.Scan, Cc.Paren_Floor) then
3220                return True;
3221             end if;
3222 
3223             Cc.Cur := N - 1;
3224             Cc.Lastloc := Lastloc;
3225             return False;
3226          end if;
3227 
3228          --  Prefer scan over next for maximal matching
3229 
3230          if N < Cc.Max then   --  more greed allowed ?
3231             Cc.Cur := N;
3232             Cc.Lastloc := Input_Pos;
3233 
3234             if Debug then
3235                Dump_Error ("Recurse at current position");
3236             end if;
3237 
3238             if Recurse_Match (Cc.Scan, Cc.Paren_Floor) then
3239                return True;
3240             end if;
3241          end if;
3242 
3243          --  Failed deeper matches of scan, so see if this one works
3244 
3245          Current_Curly := Cc.Old_Cc;
3246 
3247          if Current_Curly /= null then
3248             Ln := Current_Curly.Cur;
3249          end if;
3250 
3251          if Debug then
3252             Dump_Error ("Failed matching for later positions");
3253          end if;
3254 
3255          if Match (Cc.Next) then
3256             return True;
3257          end if;
3258 
3259          if Current_Curly /= null then
3260             Current_Curly.Cur := Ln;
3261          end if;
3262 
3263          Current_Curly := Cc;
3264          Cc.Cur := N - 1;
3265          Cc.Lastloc := Lastloc;
3266 
3267          if Debug then
3268             Dump_Error ("failed...");
3269          end if;
3270 
3271          return False;
3272       end Match_Whilem;
3273 
3274       ------------
3275       -- Repeat --
3276       ------------
3277 
3278       function Repeat
3279         (IP  : Pointer;
3280          Max : Natural := Natural'Last) return Natural
3281       is
3282          Scan  : Natural := Input_Pos;
3283          Last  : Natural;
3284          Op    : constant Opcode := Opcode'Val (Character'Pos (Program (IP)));
3285          Count : Natural;
3286          C     : Character;
3287          Is_First : Boolean := True;
3288          Bitmap   : Character_Class;
3289 
3290       begin
3291          if Max = Natural'Last or else Scan + Max - 1 > Last_In_Data then
3292             Last := Last_In_Data;
3293          else
3294             Last := Scan + Max - 1;
3295          end if;
3296 
3297          case Op is
3298             when ANY =>
3299                while Scan <= Last
3300                  and then Data (Scan) /= ASCII.LF
3301                loop
3302                   Scan := Scan + 1;
3303                end loop;
3304 
3305             when SANY =>
3306                Scan := Last + 1;
3307 
3308             when EXACT =>
3309 
3310                --  The string has only one character if Repeat was called
3311 
3312                C := Program (String_Operand (IP));
3313                while Scan <= Last
3314                  and then C = Data (Scan)
3315                loop
3316                   Scan := Scan + 1;
3317                end loop;
3318 
3319             when EXACTF =>
3320 
3321                --  The string has only one character if Repeat was called
3322 
3323                C := Program (String_Operand (IP));
3324                while Scan <= Last
3325                  and then To_Lower (C) = Data (Scan)
3326                loop
3327                   Scan := Scan + 1;
3328                end loop;
3329 
3330             when ANYOF =>
3331                if Is_First then
3332                   Bitmap_Operand (Program, IP, Bitmap);
3333                   Is_First := False;
3334                end if;
3335 
3336                while Scan <= Last
3337                  and then Get_From_Class (Bitmap, Data (Scan))
3338                loop
3339                   Scan := Scan + 1;
3340                end loop;
3341 
3342             when ALNUM =>
3343                while Scan <= Last
3344                  and then Is_Alnum (Data (Scan))
3345                loop
3346                   Scan := Scan + 1;
3347                end loop;
3348 
3349             when NALNUM =>
3350                while Scan <= Last
3351                  and then not Is_Alnum (Data (Scan))
3352                loop
3353                   Scan := Scan + 1;
3354                end loop;
3355 
3356             when SPACE =>
3357                while Scan <= Last
3358                  and then Is_White_Space (Data (Scan))
3359                loop
3360                   Scan := Scan + 1;
3361                end loop;
3362 
3363             when NSPACE =>
3364                while Scan <= Last
3365                  and then not Is_White_Space (Data (Scan))
3366                loop
3367                   Scan := Scan + 1;
3368                end loop;
3369 
3370             when DIGIT  =>
3371                while Scan <= Last
3372                  and then Is_Digit (Data (Scan))
3373                loop
3374                   Scan := Scan + 1;
3375                end loop;
3376 
3377             when NDIGIT  =>
3378                while Scan <= Last
3379                  and then not Is_Digit (Data (Scan))
3380                loop
3381                   Scan := Scan + 1;
3382                end loop;
3383 
3384             when others =>
3385                raise Program_Error;
3386          end case;
3387 
3388          Count := Scan - Input_Pos;
3389          Input_Pos := Scan;
3390          return Count;
3391       end Repeat;
3392 
3393       ---------
3394       -- Try --
3395       ---------
3396 
3397       function Try (Pos : Positive) return Boolean is
3398       begin
3399          Input_Pos  := Pos;
3400          Last_Paren := 0;
3401          Matches_Full := (others => No_Match);
3402 
3403          if Match (Program_First) then
3404             Matches_Full (0) := (Pos, Input_Pos - 1);
3405             return True;
3406          end if;
3407 
3408          return False;
3409       end Try;
3410 
3411    --  Start of processing for Match
3412 
3413    begin
3414       --  Do we have the regexp Never_Match?
3415 
3416       if Self.Size = 0 then
3417          Matches := (others => No_Match);
3418          return;
3419       end if;
3420 
3421       --  If there is a "must appear" string, look for it
3422 
3423       if Self.Must_Have_Length > 0 then
3424          declare
3425             First      : constant Character := Program (Self.Must_Have);
3426             Must_First : constant Pointer := Self.Must_Have;
3427             Must_Last  : constant Pointer :=
3428                            Must_First + Pointer (Self.Must_Have_Length - 1);
3429             Next_Try   : Natural := Index (First_In_Data, First);
3430 
3431          begin
3432             while Next_Try /= 0
3433               and then Data (Next_Try .. Next_Try + Self.Must_Have_Length - 1)
3434                           = String (Program (Must_First .. Must_Last))
3435             loop
3436                Next_Try := Index (Next_Try + 1, First);
3437             end loop;
3438 
3439             if Next_Try = 0 then
3440                Matches := (others => No_Match);
3441                return;                  -- Not present
3442             end if;
3443          end;
3444       end if;
3445 
3446       --  Mark beginning of line for ^
3447 
3448       BOL_Pos := Data'First;
3449 
3450       --  Simplest case first: an anchored match need be tried only once
3451 
3452       if Self.Anchored and then (Self.Flags and Multiple_Lines) = 0 then
3453          Matched := Try (First_In_Data);
3454 
3455       elsif Self.Anchored then
3456          declare
3457             Next_Try : Natural := First_In_Data;
3458          begin
3459             --  Test the first position in the buffer
3460             Matched := Try (Next_Try);
3461 
3462             --  Else only test after newlines
3463 
3464             if not Matched then
3465                while Next_Try <= Last_In_Data loop
3466                   while Next_Try <= Last_In_Data
3467                     and then Data (Next_Try) /= ASCII.LF
3468                   loop
3469                      Next_Try := Next_Try + 1;
3470                   end loop;
3471 
3472                   Next_Try := Next_Try + 1;
3473 
3474                   if Next_Try <= Last_In_Data then
3475                      Matched := Try (Next_Try);
3476                      exit when Matched;
3477                   end if;
3478                end loop;
3479             end if;
3480          end;
3481 
3482       elsif Self.First /= ASCII.NUL then
3483          --  We know what char it must start with
3484 
3485          declare
3486             Next_Try : Natural := Index (First_In_Data, Self.First);
3487 
3488          begin
3489             while Next_Try /= 0 loop
3490                Matched := Try (Next_Try);
3491                exit when Matched;
3492                Next_Try := Index (Next_Try + 1, Self.First);
3493             end loop;
3494          end;
3495 
3496       else
3497          --  Messy cases: try all locations (including for the empty string)
3498 
3499          Matched := Try (First_In_Data);
3500 
3501          if not Matched then
3502             for S in First_In_Data + 1 .. Last_In_Data loop
3503                Matched := Try (S);
3504                exit when Matched;
3505             end loop;
3506          end if;
3507       end if;
3508 
3509       --  Matched has its value
3510 
3511       for J in Last_Paren + 1 .. Matches'Last loop
3512          Matches_Full (J) := No_Match;
3513       end loop;
3514 
3515       Matches := Matches_Full (Matches'Range);
3516    end Match;
3517 
3518    -----------
3519    -- Match --
3520    -----------
3521 
3522    function Match
3523      (Self       : Pattern_Matcher;
3524       Data       : String;
3525       Data_First : Integer := -1;
3526       Data_Last  : Positive := Positive'Last) return Natural
3527    is
3528       Matches : Match_Array (0 .. 0);
3529 
3530    begin
3531       Match (Self, Data, Matches, Data_First, Data_Last);
3532       if Matches (0) = No_Match then
3533          return Data'First - 1;
3534       else
3535          return Matches (0).First;
3536       end if;
3537    end Match;
3538 
3539    function Match
3540      (Self       : Pattern_Matcher;
3541       Data       : String;
3542       Data_First : Integer  := -1;
3543       Data_Last  : Positive := Positive'Last) return Boolean
3544    is
3545       Matches : Match_Array (0 .. 0);
3546 
3547    begin
3548       Match (Self, Data, Matches, Data_First, Data_Last);
3549       return Matches (0).First >= Data'First;
3550    end Match;
3551 
3552    procedure Match
3553      (Expression : String;
3554       Data       : String;
3555       Matches    : out Match_Array;
3556       Size       : Program_Size := Auto_Size;
3557       Data_First : Integer      := -1;
3558       Data_Last  : Positive     := Positive'Last)
3559    is
3560       PM            : Pattern_Matcher (Size);
3561       Finalize_Size : Program_Size;
3562       pragma Unreferenced (Finalize_Size);
3563    begin
3564       if Size = 0 then
3565          Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
3566       else
3567          Compile (PM, Expression, Finalize_Size);
3568          Match (PM, Data, Matches, Data_First, Data_Last);
3569       end if;
3570    end Match;
3571 
3572    -----------
3573    -- Match --
3574    -----------
3575 
3576    function Match
3577      (Expression : String;
3578       Data       : String;
3579       Size       : Program_Size := Auto_Size;
3580       Data_First : Integer      := -1;
3581       Data_Last  : Positive     := Positive'Last) return Natural
3582    is
3583       PM         : Pattern_Matcher (Size);
3584       Final_Size : Program_Size;
3585       pragma Unreferenced (Final_Size);
3586    begin
3587       if Size = 0 then
3588          return Match (Compile (Expression), Data, Data_First, Data_Last);
3589       else
3590          Compile (PM, Expression, Final_Size);
3591          return Match (PM, Data, Data_First, Data_Last);
3592       end if;
3593    end Match;
3594 
3595    -----------
3596    -- Match --
3597    -----------
3598 
3599    function  Match
3600      (Expression : String;
3601       Data       : String;
3602       Size       : Program_Size := Auto_Size;
3603       Data_First : Integer      := -1;
3604       Data_Last  : Positive     := Positive'Last) return Boolean
3605    is
3606       Matches    : Match_Array (0 .. 0);
3607       PM         : Pattern_Matcher (Size);
3608       Final_Size : Program_Size;
3609       pragma Unreferenced (Final_Size);
3610    begin
3611       if Size = 0 then
3612          Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
3613       else
3614          Compile (PM, Expression, Final_Size);
3615          Match (PM, Data, Matches, Data_First, Data_Last);
3616       end if;
3617 
3618       return Matches (0).First >= Data'First;
3619    end Match;
3620 
3621    -------------
3622    -- Operand --
3623    -------------
3624 
3625    function Operand (P : Pointer) return Pointer is
3626    begin
3627       return P + Next_Pointer_Bytes;
3628    end Operand;
3629 
3630    --------------
3631    -- Optimize --
3632    --------------
3633 
3634    procedure Optimize (Self : in out Pattern_Matcher) is
3635       Scan    : Pointer;
3636       Program : Program_Data renames Self.Program;
3637 
3638    begin
3639       --  Start with safe defaults (no optimization):
3640       --    *  No known first character of match
3641       --    *  Does not necessarily start at beginning of line
3642       --    *  No string known that has to appear in data
3643 
3644       Self.First := ASCII.NUL;
3645       Self.Anchored := False;
3646       Self.Must_Have := Program'Last + 1;
3647       Self.Must_Have_Length := 0;
3648 
3649       Scan := Program_First;  --  First instruction (can be anything)
3650 
3651       if Program (Scan) = EXACT then
3652          Self.First := Program (String_Operand (Scan));
3653 
3654       elsif Program (Scan) = BOL
3655         or else Program (Scan) = SBOL
3656         or else Program (Scan) = MBOL
3657       then
3658          Self.Anchored := True;
3659       end if;
3660    end Optimize;
3661 
3662    -----------------
3663    -- Paren_Count --
3664    -----------------
3665 
3666    function Paren_Count (Regexp : Pattern_Matcher) return Match_Count is
3667    begin
3668       return Regexp.Paren_Count;
3669    end Paren_Count;
3670 
3671    -----------
3672    -- Quote --
3673    -----------
3674 
3675    function Quote (Str : String) return String is
3676       S    : String (1 .. Str'Length * 2);
3677       Last : Natural := 0;
3678 
3679    begin
3680       for J in Str'Range loop
3681          case Str (J) is
3682             when '^' | '$' | '|' | '*' | '+' | '?' | '{' |
3683                  '}' | '[' | ']' | '(' | ')' | '\' | '.' =>
3684 
3685                S (Last + 1) := '\';
3686                S (Last + 2) := Str (J);
3687                Last := Last + 2;
3688 
3689             when others =>
3690                S (Last + 1) := Str (J);
3691                Last := Last + 1;
3692          end case;
3693       end loop;
3694 
3695       return S (1 .. Last);
3696    end Quote;
3697 
3698    ------------------
3699    -- Read_Natural --
3700    ------------------
3701 
3702    function Read_Natural
3703      (Program : Program_Data;
3704       IP      : Pointer) return Natural
3705    is
3706    begin
3707       return Character'Pos (Program (IP)) +
3708                256 * Character'Pos (Program (IP + 1));
3709    end Read_Natural;
3710 
3711    -----------------
3712    -- Reset_Class --
3713    -----------------
3714 
3715    procedure Reset_Class (Bitmap : out Character_Class) is
3716    begin
3717       Bitmap := (others => 0);
3718    end Reset_Class;
3719 
3720    ------------------
3721    -- Set_In_Class --
3722    ------------------
3723 
3724    procedure Set_In_Class
3725      (Bitmap : in out Character_Class;
3726       C      : Character)
3727    is
3728       Value : constant Class_Byte := Character'Pos (C);
3729    begin
3730       Bitmap (Value / 8) := Bitmap (Value / 8)
3731         or Bit_Conversion (Value mod 8);
3732    end Set_In_Class;
3733 
3734    -------------------
3735    -- String_Length --
3736    -------------------
3737 
3738    function String_Length
3739      (Program : Program_Data;
3740       P       : Pointer) return Program_Size
3741    is
3742    begin
3743       pragma Assert (Program (P) = EXACT or else Program (P) = EXACTF);
3744       return Character'Pos (Program (P + Next_Pointer_Bytes));
3745    end String_Length;
3746 
3747    --------------------
3748    -- String_Operand --
3749    --------------------
3750 
3751    function String_Operand (P : Pointer) return Pointer is
3752    begin
3753       return P + 4;
3754    end String_Operand;
3755 
3756 end System.Regpat;