File : g-byorma.adb


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                 G N A T . B Y T E _ O R D E R _ M A R K                  --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --                     Copyright (C) 2006-2013, AdaCore                     --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31 
  32 pragma Compiler_Unit_Warning;
  33 
  34 package body GNAT.Byte_Order_Mark is
  35 
  36    --------------
  37    -- Read_BOM --
  38    --------------
  39 
  40    procedure Read_BOM
  41      (Str         : String;
  42       Len         : out Natural;
  43       BOM         : out BOM_Kind;
  44       XML_Support : Boolean := False)
  45    is
  46    begin
  47       --  Note: the order of these tests is important, because in some cases
  48       --  one sequence is a prefix of a longer sequence, and we must test for
  49       --  the longer sequence first
  50 
  51       --  UTF-32 (big-endian)
  52 
  53       if Str'Length >= 4
  54         and then Str (Str'First)     = Character'Val (16#00#)
  55         and then Str (Str'First + 1) = Character'Val (16#00#)
  56         and then Str (Str'First + 2) = Character'Val (16#FE#)
  57         and then Str (Str'First + 3) = Character'Val (16#FF#)
  58       then
  59          Len := 4;
  60          BOM := UTF32_BE;
  61 
  62       --  UTF-32 (little-endian)
  63 
  64       elsif Str'Length >= 4
  65         and then Str (Str'First)     = Character'Val (16#FF#)
  66         and then Str (Str'First + 1) = Character'Val (16#FE#)
  67         and then Str (Str'First + 2) = Character'Val (16#00#)
  68         and then Str (Str'First + 3) = Character'Val (16#00#)
  69       then
  70          Len := 4;
  71          BOM := UTF32_LE;
  72 
  73       --  UTF-16 (big-endian)
  74 
  75       elsif Str'Length >= 2
  76         and then Str (Str'First) = Character'Val (16#FE#)
  77         and then Str (Str'First + 1) = Character'Val (16#FF#)
  78       then
  79          Len := 2;
  80          BOM := UTF16_BE;
  81 
  82       --  UTF-16 (little-endian)
  83 
  84       elsif Str'Length >= 2
  85         and then Str (Str'First) = Character'Val (16#FF#)
  86         and then Str (Str'First + 1) = Character'Val (16#FE#)
  87       then
  88          Len := 2;
  89          BOM := UTF16_LE;
  90 
  91       --  UTF-8 (endian-independent)
  92 
  93       elsif Str'Length >= 3
  94         and then Str (Str'First)     = Character'Val (16#EF#)
  95         and then Str (Str'First + 1) = Character'Val (16#BB#)
  96         and then Str (Str'First + 2) = Character'Val (16#BF#)
  97       then
  98          Len := 3;
  99          BOM := UTF8_All;
 100 
 101       --  UCS-4 (big-endian) XML only
 102 
 103       elsif XML_Support
 104         and then Str'Length >= 4
 105         and then Str (Str'First)     = Character'Val (16#00#)
 106         and then Str (Str'First + 1) = Character'Val (16#00#)
 107         and then Str (Str'First + 2) = Character'Val (16#00#)
 108         and then Str (Str'First + 3) = Character'Val (16#3C#)
 109       then
 110          Len := 0;
 111          BOM := UCS4_BE;
 112 
 113       --  UCS-4 (little-endian) XML case
 114 
 115       elsif XML_Support
 116         and then Str'Length >= 4
 117         and then Str (Str'First)     = Character'Val (16#3C#)
 118         and then Str (Str'First + 1) = Character'Val (16#00#)
 119         and then Str (Str'First + 2) = Character'Val (16#00#)
 120         and then Str (Str'First + 3) = Character'Val (16#00#)
 121       then
 122          Len := 0;
 123          BOM := UCS4_LE;
 124 
 125       --  UCS-4 (unusual byte order 2143) XML case
 126 
 127       elsif XML_Support
 128         and then Str'Length >= 4
 129         and then Str (Str'First)     = Character'Val (16#00#)
 130         and then Str (Str'First + 1) = Character'Val (16#00#)
 131         and then Str (Str'First + 2) = Character'Val (16#3C#)
 132         and then Str (Str'First + 3) = Character'Val (16#00#)
 133       then
 134          Len := 0;
 135          BOM := UCS4_2143;
 136 
 137       --  UCS-4 (unusual byte order 3412) XML case
 138 
 139       elsif XML_Support
 140         and then Str'Length >= 4
 141         and then Str (Str'First)     = Character'Val (16#00#)
 142         and then Str (Str'First + 1) = Character'Val (16#3C#)
 143         and then Str (Str'First + 2) = Character'Val (16#00#)
 144         and then Str (Str'First + 3) = Character'Val (16#00#)
 145       then
 146          Len := 0;
 147          BOM := UCS4_3412;
 148 
 149       --  UTF-16 (big-endian) XML case
 150 
 151       elsif XML_Support
 152         and then Str'Length >= 4
 153         and then Str (Str'First)     = Character'Val (16#00#)
 154         and then Str (Str'First + 1) = Character'Val (16#3C#)
 155         and then Str (Str'First + 2) = Character'Val (16#00#)
 156         and then Str (Str'First + 3) = Character'Val (16#3F#)
 157       then
 158          Len := 0;
 159          BOM := UTF16_BE;
 160 
 161       --  UTF-32 (little-endian) XML case
 162 
 163       elsif XML_Support
 164         and then Str'Length >= 4
 165         and then Str (Str'First)     = Character'Val (16#3C#)
 166         and then Str (Str'First + 1) = Character'Val (16#00#)
 167         and then Str (Str'First + 2) = Character'Val (16#3F#)
 168         and then Str (Str'First + 3) = Character'Val (16#00#)
 169       then
 170          Len := 0;
 171          BOM := UTF16_LE;
 172 
 173       --  Unrecognized special encodings XML only
 174 
 175       elsif XML_Support
 176         and then Str'Length >= 4
 177         and then Str (Str'First)     = Character'Val (16#3C#)
 178         and then Str (Str'First + 1) = Character'Val (16#3F#)
 179         and then Str (Str'First + 2) = Character'Val (16#78#)
 180         and then Str (Str'First + 3) = Character'Val (16#6D#)
 181       then
 182          --  UTF-8, ASCII, some part of ISO8859, Shift-JIS, EUC,...
 183 
 184          Len := 0;
 185          BOM := Unknown;
 186 
 187       --  No BOM recognized
 188 
 189       else
 190          Len := 0;
 191          BOM := Unknown;
 192       end if;
 193    end Read_BOM;
 194 
 195 end GNAT.Byte_Order_Mark;