File : sinput.ads


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                               S I N P U T                                --
   6 --                                                                          --
   7 --                                 S p e c                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2016, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31 
  32 --  This package contains the input routines used for reading the
  33 --  input source file. The actual I/O routines are in OS_Interface,
  34 --  with this module containing only the system independent processing.
  35 
  36 --  General Note: throughout the compiler, we use the term line or source
  37 --  line to refer to a physical line in the source, terminated by the end of
  38 --  physical line sequence.
  39 
  40 --  There are two distinct concepts of line terminator in GNAT
  41 
  42 --    A logical line terminator is what corresponds to the "end of a line" as
  43 --    described in RM 2.2 (13). Any of the characters FF, LF, CR or VT or any
  44 --    wide character that is a Line or Paragraph Separator acts as an end of
  45 --    logical line in this sense, and it is essentially irrelevant whether one
  46 --    or more appears in sequence (since if a sequence of such characters is
  47 --    regarded as separate ends of line, then the intervening logical lines
  48 --    are null in any case).
  49 
  50 --    A physical line terminator is a sequence of format effectors that is
  51 --    treated as ending a physical line. Physical lines have no Ada semantic
  52 --    significance, but they are significant for error reporting purposes,
  53 --    since errors are identified by line and column location.
  54 
  55 --  In GNAT, a physical line is ended by any of the sequences LF, CR/LF, or
  56 --  CR. LF is used in typical Unix systems, CR/LF in DOS systems, and CR
  57 --  alone in System 7. In addition, we recognize any of these sequences in
  58 --  any of the operating systems, for better behavior in treating foreign
  59 --  files (e.g. a Unix file with LF terminators transferred to a DOS system).
  60 --  Finally, wide character codes in categories Separator, Line and Separator,
  61 --  Paragraph are considered to be physical line terminators.
  62 
  63 with Alloc;
  64 with Casing; use Casing;
  65 with Namet;  use Namet;
  66 with Table;
  67 with Types;  use Types;
  68 
  69 package Sinput is
  70 
  71    type Type_Of_File is (
  72    --  Indicates type of file being read
  73 
  74       Src,
  75       --  Normal Ada source file
  76 
  77       Config,
  78       --  Configuration pragma file
  79 
  80       Def,
  81       --  Preprocessing definition file
  82 
  83       Preproc);
  84       --  Source file with preprocessing commands to be preprocessed
  85 
  86    type Instance_Id is new Nat;
  87    No_Instance_Id : constant Instance_Id;
  88 
  89    ----------------------------
  90    -- Source License Control --
  91    ----------------------------
  92 
  93    --  The following type indicates the license state of a source if it
  94    --  is known.
  95 
  96    type License_Type is
  97      (Unknown,
  98       --  Licensing status of this source unit is unknown
  99 
 100       Restricted,
 101       --  This is a non-GPL'ed unit that is restricted from depending
 102       --  on GPL'ed units (e.g. proprietary code is in this category)
 103 
 104       GPL,
 105       --  This file is licensed under the unmodified GPL. It is not allowed
 106       --  to depend on Non_GPL units, and Non_GPL units may not depend on
 107       --  this source unit.
 108 
 109       Modified_GPL,
 110       --  This file is licensed under the GNAT modified GPL (see header of
 111       --  This file for wording of the modification). It may depend on other
 112       --  Modified_GPL units or on unrestricted units.
 113 
 114       Unrestricted);
 115       --  The license on this file is permitted to depend on any other
 116       --  units, or have other units depend on it, without violating the
 117       --  license of this unit. Examples are public domain units, and
 118       --  units defined in the RM).
 119 
 120    --  The above license status is checked when the appropriate check is
 121    --  activated and one source depends on another, and the licensing state
 122    --  of both files is known:
 123 
 124    --  The prohibited combinations are:
 125 
 126    --    Restricted file may not depend on GPL file
 127 
 128    --    GPL file may not depend on Restricted file
 129 
 130    --    Modified GPL file may not depend on Restricted file
 131    --    Modified_GPL file may not depend on GPL file
 132 
 133    --  The reason for the last restriction here is that a client depending
 134    --  on a modified GPL file must be sure that the license condition is
 135    --  correct considered transitively.
 136 
 137    --  The licensing status is determined either by the presence of a
 138    --  specific pragma License, or by scanning the header for a predefined
 139    --  statement, or any file if compiling in -gnatg mode.
 140 
 141    -----------------------
 142    -- Source File Table --
 143    -----------------------
 144 
 145    --  The source file table has an entry for each source file read in for
 146    --  this run of the compiler. This table is (default) initialized when
 147    --  the compiler is loaded, and simply accumulates entries as compilation
 148    --  proceeds and various routines in Sinput and its child packages are
 149    --  called to load required source files.
 150 
 151    --  Virtual entries are also created for generic templates when they are
 152    --  instantiated, as described in a separate section later on.
 153 
 154    --  In the case where there are multiple main units (e.g. in the case of
 155    --  the cross-reference tool), this table is not reset between these units,
 156    --  so that a given source file is only read once if it is used by two
 157    --  separate main units.
 158 
 159    --  The entries in the table are accessed using a Source_File_Index that
 160    --  ranges from 1 to Last_Source_File. Each entry has the following fields.
 161 
 162    --  Note: fields marked read-only are set by Sinput or one of its child
 163    --  packages when a source file table entry is created, and cannot be
 164    --  subsequently modified, or alternatively are set only by very special
 165    --  circumstances, documented in the comments.
 166 
 167    --  File_Name : File_Name_Type (read-only)
 168    --    Name of the source file (simple name with no directory information)
 169 
 170    --  Full_File_Name : File_Name_Type (read-only)
 171    --    Full file name (full name with directory info), used for generation
 172    --    of error messages, etc.
 173 
 174    --  File_Type : Type_Of_File (read-only)
 175    --    Indicates type of file (source file, configuration pragmas file,
 176    --    preprocessor definition file, preprocessor input file).
 177 
 178    --  Reference_Name : File_Name_Type (read-only)
 179    --    Name to be used for source file references in error messages where
 180    --    only the simple name of the file is required. Identical to File_Name
 181    --    unless pragma Source_Reference is used to change it. Only processing
 182    --    for the Source_Reference pragma circuit may set this field.
 183 
 184    --  Full_Ref_Name : File_Name_Type (read-only)
 185    --    Name to be used for source file references in error messages where
 186    --    the full name of the file is required. Identical to Full_File_Name
 187    --    unless pragma Source_Reference is used to change it. Only processing
 188    --    for the Source_Reference pragma may set this field.
 189 
 190    --  Debug_Source_Name : File_Name_Type (read-only)
 191    --    Name to be used for source file references in debugging information
 192    --    where only the simple name of the file is required. Identical to
 193    --    Reference_Name unless the -gnatD (debug source file) switch is used.
 194    --    Only processing in Sprint that generates this file is permitted to
 195    --    set this field.
 196 
 197    --  Full_Debug_Name : File_Name_Type (read-only)
 198    --    Name to be used for source file references in debugging information
 199    --    where the full name of the file is required. This is identical to
 200    --    Full_Ref_Name unless the -gnatD (debug source file) switch is used.
 201    --    Only processing in Sprint that generates this file is permitted to
 202    --    set this field.
 203 
 204    --  Instance : Instance_Id (read-only)
 205    --    For entries corresponding to a generic instantiation, unique
 206    --    identifier denoting the full chain of nested instantiations. Set to
 207    --    No_Instance_Id for the case of a normal, non-instantiation entry.
 208    --    See below for details on the handling of generic instantiations.
 209 
 210    --  License : License_Type;
 211    --    License status of source file
 212 
 213    --  Num_SRef_Pragmas : Nat;
 214    --    Number of source reference pragmas present in source file
 215 
 216    --  First_Mapped_Line : Logical_Line_Number;
 217    --    This field stores logical line number of the first line in the
 218    --    file that is not a Source_Reference pragma. If no source reference
 219    --    pragmas are used, then the value is set to No_Line_Number.
 220 
 221    --  Source_Text : Source_Buffer_Ptr (read-only)
 222    --    Text of source file. Note that every source file has a distinct set
 223    --    of non-overlapping logical bounds, so it is possible to determine
 224    --    which file is referenced from a given subscript (Source_Ptr) value.
 225 
 226    --  Source_First : Source_Ptr; (read-only)
 227    --    Subscript of first character in Source_Text. Note that this cannot
 228    --    be obtained as Source_Text'First, because we use virtual origin
 229    --    addressing.
 230 
 231    --  Source_Last : Source_Ptr; (read-only)
 232    --    Subscript of last character in Source_Text. Note that this cannot
 233    --    be obtained as Source_Text'Last, because we use virtual origin
 234    --    addressing, so this value is always Source_Ptr'Last.
 235 
 236    --  Time_Stamp : Time_Stamp_Type; (read-only)
 237    --    Time stamp of the source file
 238 
 239    --  Source_Checksum : Word;
 240    --    Computed checksum for contents of source file. See separate section
 241    --    later on in this spec for a description of the checksum algorithm.
 242 
 243    --  Last_Source_Line : Physical_Line_Number;
 244    --    Physical line number of last source line. While a file is being
 245    --    read, this refers to the last line scanned. Once a file has been
 246    --    completely scanned, it is the number of the last line in the file,
 247    --    and hence also gives the number of source lines in the file.
 248 
 249    --  Keyword_Casing : Casing_Type;
 250    --    Casing style used in file for keyword casing. This is initialized
 251    --    to Unknown, and then set from the first occurrence of a keyword.
 252    --    This value is used only for formatting of error messages.
 253 
 254    --  Identifier_Casing : Casing_Type;
 255    --    Casing style used in file for identifier casing. This is initialized
 256    --    to Unknown, and then set from an identifier in the program as soon as
 257    --    one is found whose casing is sufficiently clear to make a decision.
 258    --    This value is used for formatting of error messages, and also is used
 259    --    in the detection of keywords misused as identifiers.
 260 
 261    --  Inlined_Call : Source_Ptr;
 262    --    Source file location of the subprogram call if this source file entry
 263    --    represents an inlined body or an inherited pragma. Set to No_Location
 264    --    otherwise. This field is read-only for clients.
 265 
 266    --  Inlined_Body : Boolean;
 267    --    This can only be set True if Instantiation has a value other than
 268    --    No_Location. If true it indicates that the instantiation is actually
 269    --    an instance of an inlined body.
 270 
 271    --  Inherited_Pragma : Boolean;
 272    --    This can only be set True if Instantiation has a value other than
 273    --    No_Location. If true it indicates that the instantiation is actually
 274    --    an inherited class-wide pre- or postcondition.
 275 
 276    --  Template : Source_File_Index; (read-only)
 277    --    Source file index of the source file containing the template if this
 278    --    is a generic instantiation. Set to No_Source_File for the normal case
 279    --    of a non-instantiation entry. See Sinput-L for details.
 280 
 281    --  Unit : Unit_Number_Type;
 282    --    Identifies the unit contained in this source file. Set by
 283    --    Initialize_Scanner, must not be subsequently altered.
 284 
 285    --  The source file table is accessed by clients using the following
 286    --  subprogram interface:
 287 
 288    subtype SFI is Source_File_Index;
 289 
 290    System_Source_File_Index : SFI;
 291    --  The file system.ads is always read by the compiler to determine the
 292    --  settings of the target parameters in the private part of System. This
 293    --  variable records the source file index of system.ads. Typically this
 294    --  will be 1 since system.ads is read first.
 295 
 296    function Debug_Source_Name (S : SFI) return File_Name_Type;
 297    function File_Name         (S : SFI) return File_Name_Type;
 298    function File_Type         (S : SFI) return Type_Of_File;
 299    function First_Mapped_Line (S : SFI) return Logical_Line_Number;
 300    function Full_Debug_Name   (S : SFI) return File_Name_Type;
 301    function Full_File_Name    (S : SFI) return File_Name_Type;
 302    function Full_Ref_Name     (S : SFI) return File_Name_Type;
 303    function Identifier_Casing (S : SFI) return Casing_Type;
 304    function Inlined_Body      (S : SFI) return Boolean;
 305    function Inherited_Pragma  (S : SFI) return Boolean;
 306    function Inlined_Call      (S : SFI) return Source_Ptr;
 307    function Instance          (S : SFI) return Instance_Id;
 308    function Keyword_Casing    (S : SFI) return Casing_Type;
 309    function Last_Source_Line  (S : SFI) return Physical_Line_Number;
 310    function License           (S : SFI) return License_Type;
 311    function Num_SRef_Pragmas  (S : SFI) return Nat;
 312    function Reference_Name    (S : SFI) return File_Name_Type;
 313    function Source_Checksum   (S : SFI) return Word;
 314    function Source_First      (S : SFI) return Source_Ptr;
 315    function Source_Last       (S : SFI) return Source_Ptr;
 316    function Source_Text       (S : SFI) return Source_Buffer_Ptr;
 317    function Template          (S : SFI) return Source_File_Index;
 318    function Unit              (S : SFI) return Unit_Number_Type;
 319    function Time_Stamp        (S : SFI) return Time_Stamp_Type;
 320 
 321    procedure Set_Keyword_Casing    (S : SFI; C : Casing_Type);
 322    procedure Set_Identifier_Casing (S : SFI; C : Casing_Type);
 323    procedure Set_License           (S : SFI; L : License_Type);
 324    procedure Set_Unit              (S : SFI; U : Unit_Number_Type);
 325 
 326    function Last_Source_File return Source_File_Index;
 327    --  Index of last source file table entry
 328 
 329    function Num_Source_Files return Nat;
 330    --  Number of source file table entries
 331 
 332    procedure Initialize;
 333    --  Initialize internal tables
 334 
 335    procedure Lock;
 336    --  Lock internal tables
 337 
 338    procedure Unlock;
 339    --  Unlock internal tables
 340 
 341    Main_Source_File : Source_File_Index := No_Source_File;
 342    --  This is set to the source file index of the main unit
 343 
 344    -----------------------------
 345    -- Source_File_Index_Table --
 346    -----------------------------
 347 
 348    --  The Get_Source_File_Index function is called very frequently. Earlier
 349    --  versions cached a single entry, but then reverted to a serial search,
 350    --  and this proved to be a significant source of inefficiency. We then
 351    --  switched to using a table with a start point followed by a serial
 352    --  search. Now we make sure source buffers are on a reasonable boundary
 353    --  (see Types.Source_Align), and we can just use a direct look up in the
 354    --  following table.
 355 
 356    --  Note that this array is pretty large, but in most operating systems
 357    --  it will not be allocated in physical memory unless it is actually used.
 358 
 359    Source_File_Index_Table :
 360      array (Int range 0 .. 1 + (Int'Last / Source_Align)) of Source_File_Index;
 361 
 362    procedure Set_Source_File_Index_Table (Xnew : Source_File_Index);
 363    --  Sets entries in the Source_File_Index_Table for the newly created
 364    --  Source_File table entry whose index is Xnew. The Source_First and
 365    --  Source_Last fields of this entry must be set before the call.
 366 
 367    -----------------------
 368    -- Checksum Handling --
 369    -----------------------
 370 
 371    --  As a source file is scanned, a checksum is computed by taking all the
 372    --  non-blank characters in the file, excluding comment characters, the
 373    --  minus-minus sequence starting a comment, and all control characters
 374    --  except ESC.
 375 
 376    --  The checksum algorithm used is the standard CRC-32 algorithm, as
 377    --  implemented by System.CRC32, except that we do not bother with the
 378    --  final XOR with all 1 bits.
 379 
 380    --  This algorithm ensures that the checksum includes all semantically
 381    --  significant aspects of the program represented by the source file,
 382    --  but is insensitive to layout, presence or contents of comments, wide
 383    --  character representation method, or casing conventions outside strings.
 384 
 385    --  Scans.Checksum is initialized appropriately at the start of scanning
 386    --  a file, and copied into the Source_Checksum field of the file table
 387    --  entry when the end of file is encountered.
 388 
 389    -------------------------------------
 390    -- Handling Generic Instantiations --
 391    -------------------------------------
 392 
 393    --  As described in Sem_Ch12, a generic instantiation involves making a
 394    --  copy of the tree of the generic template. The source locations in
 395    --  this tree directly reference the source of the template. However, it
 396    --  is also possible to find the location of the instantiation.
 397 
 398    --  This is achieved as follows. When an instantiation occurs, a new entry
 399    --  is made in the source file table. This entry points to the same source
 400    --  text, i.e. the file that contains the instantiation, but has a distinct
 401    --  set of Source_Ptr index values. The separate range of Sloc values avoids
 402    --  confusion, and means that the Sloc values can still be used to uniquely
 403    --  identify the source file table entry. It is possible for both entries
 404    --  to point to the same text, because of the virtual origin pointers used
 405    --  in the source table.
 406 
 407    --  The Instantiation_Id field of this source file index entry, set
 408    --  to No_Instance_Id for normal entries, instead contains a value that
 409    --  uniquely identifies a particular instantiation, and the associated
 410    --  entry in the Instances table. The source location of the instantiation
 411    --  can be retrieved using function Instantiation below. In the case of
 412    --  nested instantiations, the Instances table can be used to trace the
 413    --  complete chain of nested instantiations.
 414 
 415    --  Two routines are used to build the special instance entries in the
 416    --  source file table. Create_Instantiation_Source is first called to build
 417    --  the virtual source table entry for the instantiation, and then the
 418    --  Sloc values in the copy are adjusted using Adjust_Instantiation_Sloc.
 419    --  See child unit Sinput.L for details on these two routines.
 420 
 421    generic
 422       with procedure Process (Id : Instance_Id; Inst_Sloc : Source_Ptr);
 423    procedure Iterate_On_Instances;
 424    --  Execute Process for each entry in the instance table
 425 
 426    function Instantiation (S : SFI) return Source_Ptr;
 427    --  For a source file entry that represents an inlined body, source location
 428    --  of the inlined call. For a source file entry that represents an
 429    --  inherited pragma, source location of the declaration to which the
 430    --  overriding subprogram for the inherited pragma is attached. Otherwise,
 431    --  for a source file entry that represents a generic instantiation, source
 432    --  location of the instantiation. Returns No_Location in all other cases.
 433 
 434    -----------------
 435    -- Global Data --
 436    -----------------
 437 
 438    Current_Source_File : Source_File_Index := No_Source_File;
 439    --  Source_File table index of source file currently being scanned.
 440    --  Initialized so that some tools (such as gprbuild) can be built with
 441    --  -gnatVa and pragma Initialize_Scalars without problems.
 442 
 443    Current_Source_Unit : Unit_Number_Type;
 444    --  Unit number of source file currently being scanned. The special value
 445    --  of No_Unit indicates that the configuration pragma file is currently
 446    --  being scanned (this has no entry in the unit table).
 447 
 448    Source_gnat_adc : Source_File_Index := No_Source_File;
 449    --  This is set if a gnat.adc file is present to reference this file
 450 
 451    Source : Source_Buffer_Ptr;
 452    --  Current source (copy of Source_File.Table (Current_Source_Unit).Source)
 453 
 454    Internal_Source : aliased Source_Buffer (1 .. 81);
 455    --  This buffer is used internally in the compiler when the lexical analyzer
 456    --  is used to scan a string from within the compiler. The procedure is to
 457    --  establish Internal_Source_Ptr as the value of Source, set the string to
 458    --  be scanned, appropriately terminated, in this buffer, and set Scan_Ptr
 459    --  to point to the start of the buffer. It is a fatal error if the scanner
 460    --  signals an error while scanning a token in this internal buffer.
 461 
 462    Internal_Source_Ptr : constant Source_Buffer_Ptr :=
 463                            Internal_Source'Unrestricted_Access;
 464    --  Pointer to internal source buffer
 465 
 466    -----------------------------------------
 467    -- Handling of Source Line Terminators --
 468    -----------------------------------------
 469 
 470    --  In this section we discuss in detail the issue of terminators used to
 471    --  terminate source lines. The RM says that one or more format effectors
 472    --  (other than horizontal tab) end a source line, and defines the set of
 473    --  such format effectors, but does not talk about exactly how they are
 474    --  represented in the source program (since in general the RM is not in
 475    --  the business of specifying source program formats).
 476 
 477    --  The type Types.Line_Terminator is defined as a subtype of Character
 478    --  that includes CR/LF/VT/FF. The most common line enders in practice
 479    --  are CR (some MAC systems), LF (Unix systems), and CR/LF (DOS/Windows
 480    --  systems). Any of these sequences is recognized as ending a physical
 481    --  source line, and if multiple such terminators appear (e.g. LF/LF),
 482    --  then we consider we have an extra blank line.
 483 
 484    --  VT and FF are recognized as terminating source lines, but they are
 485    --  considered to end a logical line instead of a physical line, so that
 486    --  the line numbering ignores such terminators. The use of VT and FF is
 487    --  mandated by the standard, and correctly handled in a conforming manner
 488    --  by GNAT, but their use is not recommended.
 489 
 490    --  In addition to the set of characters defined by the type in Types, in
 491    --  wide character encoding, then the codes returning True for a call to
 492    --  System.UTF_32.Is_UTF_32_Line_Terminator are also recognized as ending a
 493    --  source line. This includes the standard codes defined above in addition
 494    --  to NEL (NEXT LINE), LINE SEPARATOR and PARAGRAPH SEPARATOR. Again, as in
 495    --  the case of VT and FF, the standard requires we recognize these as line
 496    --  terminators, but we consider them to be logical line terminators. The
 497    --  only physical line terminators recognized are the standard ones (CR,
 498    --  LF, or CR/LF).
 499 
 500    --  However, we do not recognize the NEL (16#85#) character as having the
 501    --  significance of an end of line character when operating in normal 8-bit
 502    --  Latin-n input mode for the compiler. Instead the rule in this mode is
 503    --  that all upper half control codes (16#80# .. 16#9F#) are illegal if they
 504    --  occur in program text, and are ignored if they appear in comments.
 505 
 506    --  First, note that this behavior is fully conforming with the standard.
 507    --  The standard has nothing whatever to say about source representation
 508    --  and implementations are completely free to make there own rules. In
 509    --  this case, in 8-bit mode, GNAT decides that the 16#0085# character is
 510    --  not a representation of the NEL character, even though it looks like it.
 511    --  If you have NEL's in your program, which you expect to be treated as
 512    --  end of line characters, you must use a wide character encoding such as
 513    --  UTF-8 for this code to be recognized.
 514 
 515    --  Second, an explanation of why we take this slightly surprising choice.
 516    --  We have never encountered anyone actually using the NEL character to
 517    --  end lines. One user raised the issue as a result of some experiments,
 518    --  but no one has ever submitted a program encoded this way, in any of
 519    --  the possible encodings. It seems that even when using wide character
 520    --  codes extensively, the normal approach is to use standard line enders
 521    --  (LF or CR/LF). So the failure to recognize NEL in this mode seems to
 522    --  have no practical downside.
 523 
 524    --  Moreover, what we have seen in a significant number of programs from
 525    --  multiple sources is the practice of writing all program text in lower
 526    --  half (ASCII) form, but using UTF-8 encoded wide characters freely in
 527    --  comments, where the comments are terminated by normal line endings
 528    --  (LF or CR/LF). The comments do not contain NEL codes, but they can and
 529    --  do contain other UTF-8 encoding sequences where one of the bytes is the
 530    --  NEL code. Now such programs can of course be compiled in UTF-8 mode,
 531    --  but in practice they also compile fine in standard 8-bit mode without
 532    --  specifying a character encoding. Since this is common practice, it would
 533    --  be a signficant upwards incompatibility to recognize NEL in 8-bit mode.
 534 
 535    -----------------
 536    -- Subprograms --
 537    -----------------
 538 
 539    procedure Backup_Line (P : in out Source_Ptr);
 540    --  Back up the argument pointer to the start of the previous line. On
 541    --  entry, P points to the start of a physical line in the source buffer.
 542    --  On return, P is updated to point to the start of the previous line.
 543    --  The caller has checked that a Line_Terminator character precedes P so
 544    --  that there definitely is a previous line in the source buffer.
 545 
 546    procedure Build_Location_String
 547      (Buf : in out Bounded_String;
 548       Loc : Source_Ptr);
 549    --  This function builds a string literal of the form "name:line", where
 550    --  name is the file name corresponding to Loc, and line is the line number.
 551    --  If instantiations are involved, additional suffixes of the same form are
 552    --  appended after the separating string " instantiated at ". The returned
 553    --  string is appended to Buf.
 554 
 555    function Build_Location_String (Loc : Source_Ptr) return String;
 556    --  Functional form returning a String
 557 
 558    procedure Check_For_BOM;
 559    --  Check if the current source starts with a BOM. Scan_Ptr needs to be at
 560    --  the start of the current source. If the current source starts with a
 561    --  recognized BOM, then some flags such as Wide_Character_Encoding_Method
 562    --  are set accordingly, and the Scan_Ptr on return points past this BOM.
 563    --  An error message is output and Unrecoverable_Error raised if a non-
 564    --  recognized BOM is detected. The call has no effect if no BOM is found.
 565 
 566    function Get_Column_Number (P : Source_Ptr) return Column_Number;
 567    --  The ones-origin column number of the specified Source_Ptr value is
 568    --  determined and returned. Tab characters if present are assumed to
 569    --  represent the standard 1,9,17.. spacing pattern.
 570 
 571    function Get_Logical_Line_Number
 572      (P : Source_Ptr) return Logical_Line_Number;
 573    --  The line number of the specified source position is obtained by
 574    --  doing a binary search on the source positions in the lines table
 575    --  for the unit containing the given source position. The returned
 576    --  value is the logical line number, already adjusted for the effect
 577    --  of source reference pragmas. If P refers to the line of a source
 578    --  reference pragma itself, then No_Line is returned. If no source
 579    --  reference pragmas have been encountered, the value returned is
 580    --  the same as the physical line number.
 581 
 582    function Get_Logical_Line_Number_Img
 583      (P : Source_Ptr) return String;
 584    --  Same as above function, but returns the line number as a string of
 585    --  decimal digits, with no leading space. Destroys Name_Buffer.
 586 
 587    function Get_Physical_Line_Number
 588      (P : Source_Ptr) return Physical_Line_Number;
 589    --  The line number of the specified source position is obtained by
 590    --  doing a binary search on the source positions in the lines table
 591    --  for the unit containing the given source position. The returned
 592    --  value is the physical line number in the source being compiled.
 593 
 594    function Get_Source_File_Index (S : Source_Ptr) return Source_File_Index;
 595    pragma Inline (Get_Source_File_Index);
 596    --  Return file table index of file identified by given source pointer
 597    --  value. This call must always succeed, since any valid source pointer
 598    --  value belongs to some previously loaded source file.
 599 
 600    function Instantiation_Depth (S : Source_Ptr) return Nat;
 601    --  Determine instantiation depth for given Sloc value. A value of
 602    --  zero means that the given Sloc is not in an instantiation.
 603 
 604    function Line_Start (P : Source_Ptr) return Source_Ptr;
 605    --  Finds the source position of the start of the line containing the
 606    --  given source location.
 607 
 608    function Line_Start
 609      (L : Physical_Line_Number;
 610       S : Source_File_Index) return Source_Ptr;
 611    --  Finds the source position of the start of the given line in the
 612    --  given source file, using a physical line number to identify the line.
 613 
 614    function Num_Source_Lines (S : Source_File_Index) return Nat;
 615    --  Returns the number of source lines (this is equivalent to reading
 616    --  the value of Last_Source_Line, but returns Nat rather than a
 617    --  physical line number).
 618 
 619    procedure Register_Source_Ref_Pragma
 620      (File_Name          : File_Name_Type;
 621       Stripped_File_Name : File_Name_Type;
 622       Mapped_Line        : Nat;
 623       Line_After_Pragma  : Physical_Line_Number);
 624    --  Register a source reference pragma, the parameter File_Name is the
 625    --  file name from the pragma, and Stripped_File_Name is this name with
 626    --  the directory information stripped. Both these parameters are set
 627    --  to No_Name if no file name parameter was given in the pragma.
 628    --  (which can only happen for the second and subsequent pragmas).
 629    --  Mapped_Line is the line number parameter from the pragma, and
 630    --  Line_After_Pragma is the physical line number of the line that
 631    --  follows the line containing the Source_Reference pragma.
 632 
 633    function Original_Location (S : Source_Ptr) return Source_Ptr;
 634    --  Given a source pointer S, returns the corresponding source pointer
 635    --  value ignoring instantiation copies. For locations that do not
 636    --  correspond to instantiation copies of templates, the argument is
 637    --  returned unchanged. For locations that do correspond to copies of
 638    --  templates from instantiations, the location within the original
 639    --  template is returned. This is useful in canonicalizing locations.
 640 
 641    function Instantiation_Location (S : Source_Ptr) return Source_Ptr;
 642    pragma Inline (Instantiation_Location);
 643    --  Given a source pointer S, returns the corresponding source pointer
 644    --  value of the instantiation if this location is within an instance.
 645    --  If S is not within an instance, then this returns No_Location.
 646 
 647    function Comes_From_Inlined_Body (S : Source_Ptr) return Boolean;
 648    pragma Inline (Comes_From_Inlined_Body);
 649    --  Given a source pointer S, returns whether it comes from an inlined body.
 650    --  This allows distinguishing these source pointers from those that come
 651    --  from instantiation of generics, since Instantiation_Location returns a
 652    --  valid location in both cases.
 653 
 654    function Comes_From_Inherited_Pragma (S : Source_Ptr) return Boolean;
 655    pragma Inline (Comes_From_Inherited_Pragma);
 656    --  Given a source pointer S, returns whether it comes from an inherited
 657    --  pragma. This allows distinguishing these source pointers from those
 658    --  that come from instantiation of generics, since Instantiation_Location
 659    --  returns a valid location in both cases.
 660 
 661    function Top_Level_Location (S : Source_Ptr) return Source_Ptr;
 662    --  Given a source pointer S, returns the argument unchanged if it is
 663    --  not in an instantiation. If S is in an instantiation, then it returns
 664    --  the location of the top level instantiation, i.e. the outer level
 665    --  instantiation in the nested case.
 666 
 667    function Physical_To_Logical
 668      (Line : Physical_Line_Number;
 669       S    : Source_File_Index) return Logical_Line_Number;
 670    --  Given a physical line number in source file whose source index is S,
 671    --  return the corresponding logical line number. If the physical line
 672    --  number is one containing a Source_Reference pragma, the result will
 673    --  be No_Line_Number.
 674 
 675    procedure Skip_Line_Terminators
 676      (P        : in out Source_Ptr;
 677       Physical : out Boolean);
 678    --  On entry, P points to a line terminator that has been encountered,
 679    --  which is one of FF,LF,VT,CR or a wide character sequence whose value is
 680    --  in category Separator,Line or Separator,Paragraph. P points just past
 681    --  the character that was scanned. The purpose of this routine is to
 682    --  distinguish physical and logical line endings. A physical line ending
 683    --  is one of:
 684    --
 685    --     CR on its own (MAC System 7)
 686    --     LF on its own (Unix and unix-like systems)
 687    --     CR/LF (DOS, Windows)
 688    --     Wide character in Separator,Line or Separator,Paragraph category
 689    --
 690    --     Note: we no longer recognize LF/CR (which we did in some earlier
 691    --     versions of GNAT. The reason for this is that this sequence is not
 692    --     used and recognizing it generated confusion. For example given the
 693    --     sequence LF/CR/LF we were interpreting that as (LF/CR) ending the
 694    --     first line and a blank line ending with CR following, but it is
 695    --     clearly better to interpret this as LF, with a blank line terminated
 696    --     by CR/LF, given that LF and CR/LF are both in common use, but no
 697    --     system we know of uses LF/CR.
 698    --
 699    --  A logical line ending (that is not a physical line ending) is one of:
 700    --
 701    --     VT on its own
 702    --     FF on its own
 703    --
 704    --  On return, P is bumped past the line ending sequence (one of the above
 705    --  seven possibilities). Physical is set to True to indicate that a
 706    --  physical end of line was encountered, in which case this routine also
 707    --  makes sure that the lines table for the current source file has an
 708    --  appropriate entry for the start of the new physical line.
 709 
 710    procedure Sloc_Range (N : Node_Id; Min, Max : out Source_Ptr);
 711    --  Given a node, returns the minimum and maximum source locations of any
 712    --  node in the syntactic subtree for the node. This is not quite the same
 713    --  as the locations of the first and last token in the node construct
 714    --  because parentheses at the outer level do not have a recorded Sloc.
 715    --
 716    --  Note: At each step of the tree traversal, we make sure to go back to
 717    --  the Original_Node, since this function is concerned about original
 718    --  (source) locations.
 719    --
 720    --  Note: if the tree for the expression contains no "real" Sloc values,
 721    --  i.e. values > No_Location, then both Min and Max are set to
 722    --  Sloc (Original_Node (N)).
 723 
 724    function Source_Offset (S : Source_Ptr) return Nat;
 725    --  Returns the zero-origin offset of the given source location from the
 726    --  start of its corresponding unit. This is used for creating canonical
 727    --  names in some situations.
 728 
 729    procedure Write_Location (P : Source_Ptr);
 730    --  Writes out a string of the form fff:nn:cc, where fff, nn, cc are the
 731    --  file name, line number and column corresponding to the given source
 732    --  location. No_Location and Standard_Location appear as the strings
 733    --  <no location> and <standard location>. If the location is within an
 734    --  instantiation, then the instance location is appended, enclosed in
 735    --  square brackets (which can nest if necessary). Note that this routine
 736    --  is used only for internal compiler debugging output purposes (which
 737    --  is why the somewhat cryptic use of brackets is acceptable).
 738 
 739    procedure wl (P : Source_Ptr);
 740    pragma Export (Ada, wl);
 741    --  Equivalent to Write_Location (P); Write_Eol; for calls from GDB
 742 
 743    procedure Write_Time_Stamp (S : Source_File_Index);
 744    --  Writes time stamp of specified file in YY-MM-DD HH:MM.SS format
 745 
 746    procedure Tree_Read;
 747    --  Initializes internal tables from current tree file using the relevant
 748    --  Table.Tree_Read routines.
 749 
 750    procedure Tree_Write;
 751    --  Writes out internal tables to current tree file using the relevant
 752    --  Table.Tree_Write routines.
 753 
 754 private
 755    pragma Inline (File_Name);
 756    pragma Inline (Full_File_Name);
 757    pragma Inline (File_Type);
 758    pragma Inline (Reference_Name);
 759    pragma Inline (Full_Ref_Name);
 760    pragma Inline (Debug_Source_Name);
 761    pragma Inline (Full_Debug_Name);
 762    pragma Inline (Instance);
 763    pragma Inline (License);
 764    pragma Inline (Num_SRef_Pragmas);
 765    pragma Inline (First_Mapped_Line);
 766    pragma Inline (Source_Text);
 767    pragma Inline (Source_First);
 768    pragma Inline (Source_Last);
 769    pragma Inline (Time_Stamp);
 770    pragma Inline (Source_Checksum);
 771    pragma Inline (Last_Source_Line);
 772    pragma Inline (Keyword_Casing);
 773    pragma Inline (Identifier_Casing);
 774    pragma Inline (Inlined_Call);
 775    pragma Inline (Inlined_Body);
 776    pragma Inline (Inherited_Pragma);
 777    pragma Inline (Template);
 778    pragma Inline (Unit);
 779 
 780    pragma Inline (Set_Keyword_Casing);
 781    pragma Inline (Set_Identifier_Casing);
 782 
 783    pragma Inline (Last_Source_File);
 784    pragma Inline (Num_Source_Files);
 785    pragma Inline (Num_Source_Lines);
 786 
 787    No_Instance_Id : constant Instance_Id := 0;
 788 
 789    -------------------------
 790    -- Source_Lines Tables --
 791    -------------------------
 792 
 793    type Lines_Table_Type is
 794      array (Physical_Line_Number) of Source_Ptr;
 795    --  Type used for lines table. The entries are indexed by physical line
 796    --  numbers. The values are the starting Source_Ptr values for the start
 797    --  of the corresponding physical line. Note that we make this a bogus
 798    --  big array, sized as required, so that we avoid the use of fat pointers.
 799 
 800    type Lines_Table_Ptr is access all Lines_Table_Type;
 801    --  Type used for pointers to line tables
 802 
 803    type Logical_Lines_Table_Type is
 804      array (Physical_Line_Number) of Logical_Line_Number;
 805    --  Type used for logical lines table. This table is used if a source
 806    --  reference pragma is present. It is indexed by physical line numbers,
 807    --  and contains the corresponding logical line numbers. An entry that
 808    --  corresponds to a source reference pragma is set to No_Line_Number.
 809    --  Note that we make this a bogus big array, sized as required, so that
 810    --  we avoid the use of fat pointers.
 811 
 812    type Logical_Lines_Table_Ptr is access all Logical_Lines_Table_Type;
 813    --  Type used for pointers to logical line tables
 814 
 815    -----------------------
 816    -- Source_File Table --
 817    -----------------------
 818 
 819    --  See earlier descriptions for meanings of public fields
 820 
 821    type Source_File_Record is record
 822       File_Name         : File_Name_Type;
 823       Reference_Name    : File_Name_Type;
 824       Debug_Source_Name : File_Name_Type;
 825       Full_Debug_Name   : File_Name_Type;
 826       Full_File_Name    : File_Name_Type;
 827       Full_Ref_Name     : File_Name_Type;
 828       Instance          : Instance_Id;
 829       Num_SRef_Pragmas  : Nat;
 830       First_Mapped_Line : Logical_Line_Number;
 831       Source_Text       : Source_Buffer_Ptr;
 832       Source_First      : Source_Ptr;
 833       Source_Last       : Source_Ptr;
 834       Source_Checksum   : Word;
 835       Last_Source_Line  : Physical_Line_Number;
 836       Template          : Source_File_Index;
 837       Unit              : Unit_Number_Type;
 838       Time_Stamp        : Time_Stamp_Type;
 839       File_Type         : Type_Of_File;
 840       Inlined_Call      : Source_Ptr;
 841       Inlined_Body      : Boolean;
 842       Inherited_Pragma  : Boolean;
 843       License           : License_Type;
 844       Keyword_Casing    : Casing_Type;
 845       Identifier_Casing : Casing_Type;
 846 
 847       --  The following fields are for internal use only (i.e. only in the
 848       --  body of Sinput or its children, with no direct access by clients).
 849 
 850       Sloc_Adjust : Source_Ptr;
 851       --  A value to be added to Sloc values for this file to reference the
 852       --  corresponding lines table. This is zero for the non-instantiation
 853       --  case, and set so that the addition references the ultimate template
 854       --  for the instantiation case. See Sinput-L for further details.
 855 
 856       Lines_Table : Lines_Table_Ptr;
 857       --  Pointer to lines table for this source. Updated as additional
 858       --  lines are accessed using the Skip_Line_Terminators procedure.
 859       --  Note: the lines table for an instantiation entry refers to the
 860       --  original line numbers of the template see Sinput-L for details.
 861 
 862       Logical_Lines_Table : Logical_Lines_Table_Ptr;
 863       --  Pointer to logical lines table for this source. Non-null only if
 864       --  a source reference pragma has been processed. Updated as lines
 865       --  are accessed using the Skip_Line_Terminators procedure.
 866 
 867       Lines_Table_Max : Physical_Line_Number;
 868       --  Maximum subscript values for currently allocated Lines_Table
 869       --  and (if present) the allocated Logical_Lines_Table. The value
 870       --  Max_Source_Line gives the maximum used value, this gives the
 871       --  maximum allocated value.
 872 
 873    end record;
 874 
 875    --  The following representation clause ensures that the above record
 876    --  has no holes. We do this so that when instances of this record are
 877    --  written by Tree_Gen, we do not write uninitialized values to the file.
 878 
 879    AS : constant Pos := Standard'Address_Size;
 880 
 881    for Source_File_Record use record
 882       File_Name           at  0 range 0 .. 31;
 883       Reference_Name      at  4 range 0 .. 31;
 884       Debug_Source_Name   at  8 range 0 .. 31;
 885       Full_Debug_Name     at 12 range 0 .. 31;
 886       Full_File_Name      at 16 range 0 .. 31;
 887       Full_Ref_Name       at 20 range 0 .. 31;
 888       Instance            at 48 range 0 .. 31;
 889       Num_SRef_Pragmas    at 24 range 0 .. 31;
 890       First_Mapped_Line   at 28 range 0 .. 31;
 891       Source_First        at 32 range 0 .. 31;
 892       Source_Last         at 36 range 0 .. 31;
 893       Source_Checksum     at 40 range 0 .. 31;
 894       Last_Source_Line    at 44 range 0 .. 31;
 895       Template            at 52 range 0 .. 31;
 896       Unit                at 56 range 0 .. 31;
 897       Time_Stamp          at 60 range 0 .. 8 * Time_Stamp_Length - 1;
 898       File_Type           at 74 range 0 .. 7;
 899       Inlined_Call        at 88 range 0 .. 31;
 900       Inlined_Body        at 75 range 0 .. 0;
 901       Inherited_Pragma    at 75 range 1 .. 1;
 902       License             at 76 range 0 .. 7;
 903       Keyword_Casing      at 77 range 0 .. 7;
 904       Identifier_Casing   at 78 range 0 .. 15;
 905       Sloc_Adjust         at 80 range 0 .. 31;
 906       Lines_Table_Max     at 84 range 0 .. 31;
 907 
 908       --  The following fields are pointers, so we have to specialize their
 909       --  lengths using pointer size, obtained above as Standard'Address_Size.
 910 
 911       Source_Text         at 92 range 0      .. AS - 1;
 912       Lines_Table         at 92 range AS     .. AS * 2 - 1;
 913       Logical_Lines_Table at 92 range AS * 2 .. AS * 3 - 1;
 914    end record;
 915 
 916    for Source_File_Record'Size use 92 * 8 + AS * 3;
 917    --  This ensures that we did not leave out any fields
 918 
 919    package Source_File is new Table.Table (
 920      Table_Component_Type => Source_File_Record,
 921      Table_Index_Type     => Source_File_Index,
 922      Table_Low_Bound      => 1,
 923      Table_Initial        => Alloc.Source_File_Initial,
 924      Table_Increment      => Alloc.Source_File_Increment,
 925      Table_Name           => "Source_File");
 926 
 927    --  Auxiliary table containing source location of instantiations. Index 0
 928    --  is used for code that does not come from an instance.
 929 
 930    package Instances is new Table.Table (
 931      Table_Component_Type => Source_Ptr,
 932      Table_Index_Type     => Instance_Id,
 933      Table_Low_Bound      => 0,
 934      Table_Initial        => Alloc.Source_File_Initial,
 935      Table_Increment      => Alloc.Source_File_Increment,
 936      Table_Name           => "Instances");
 937 
 938    -----------------
 939    -- Subprograms --
 940    -----------------
 941 
 942    procedure Alloc_Line_Tables
 943      (S       : in out Source_File_Record;
 944       New_Max : Nat);
 945    --  Allocate or reallocate the lines table for the given source file so
 946    --  that it can accommodate at least New_Max lines. Also allocates or
 947    --  reallocates logical lines table if source ref pragmas are present.
 948 
 949    procedure Add_Line_Tables_Entry
 950      (S : in out Source_File_Record;
 951       P : Source_Ptr);
 952    --  Increment line table size by one (reallocating the lines table if
 953    --  needed) and set the new entry to contain the value P. Also bumps
 954    --  the Source_Line_Count field. If source reference pragmas are
 955    --  present, also increments logical lines table size by one, and
 956    --  sets new entry.
 957 
 958    procedure Trim_Lines_Table (S : Source_File_Index);
 959    --  Set lines table size for entry S in the source file table to
 960    --  correspond to the current value of Num_Source_Lines, releasing
 961    --  any unused storage. This is used by Sinput.L and Sinput.D.
 962 
 963 end Sinput;