File : a-stuten.adb


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT RUN-TIME COMPONENTS                         --
   4 --                                                                          --
   5 --              A D A . S T R I N G S . U T F _ E N C O D I N G             --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --             Copyright (C) 2010, Free Software Foundation, Inc.           --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31 
  32 package body Ada.Strings.UTF_Encoding is
  33    use Interfaces;
  34 
  35    --------------
  36    -- Encoding --
  37    --------------
  38 
  39    function Encoding
  40      (Item    : UTF_String;
  41       Default : Encoding_Scheme := UTF_8) return Encoding_Scheme
  42    is
  43    begin
  44       if Item'Length >= 2 then
  45          if Item (Item'First .. Item'First + 1) = BOM_16BE then
  46             return UTF_16BE;
  47 
  48          elsif Item (Item'First .. Item'First + 1) = BOM_16LE then
  49             return UTF_16LE;
  50 
  51          elsif Item'Length >= 3
  52            and then Item (Item'First .. Item'First + 2) = BOM_8
  53          then
  54             return UTF_8;
  55          end if;
  56       end if;
  57 
  58       return Default;
  59    end Encoding;
  60 
  61    -----------------
  62    -- From_UTF_16 --
  63    -----------------
  64 
  65    function From_UTF_16
  66      (Item          : UTF_16_Wide_String;
  67       Output_Scheme : UTF_XE_Encoding;
  68       Output_BOM    : Boolean := False) return UTF_String
  69    is
  70       BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM);
  71       Result : UTF_String (1 .. 2 * Item'Length + BSpace);
  72       Len    : Natural;
  73       C      : Unsigned_16;
  74       Iptr   : Natural;
  75 
  76    begin
  77       if Output_BOM then
  78          Result (1 .. 2) :=
  79            (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE);
  80          Len := 2;
  81       else
  82          Len := 0;
  83       end if;
  84 
  85       --  Skip input BOM
  86 
  87       Iptr := Item'First;
  88 
  89       if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then
  90          Iptr := Iptr + 1;
  91       end if;
  92 
  93       --  UTF-16BE case
  94 
  95       if Output_Scheme = UTF_16BE then
  96          while Iptr <= Item'Last loop
  97             C := To_Unsigned_16 (Item (Iptr));
  98             Result (Len + 1) := Character'Val (Shift_Right (C, 8));
  99             Result (Len + 2) := Character'Val (C and 16#00_FF#);
 100             Len := Len + 2;
 101             Iptr := Iptr + 1;
 102          end loop;
 103 
 104       --  UTF-16LE case
 105 
 106       else
 107          while Iptr <= Item'Last loop
 108             C := To_Unsigned_16 (Item (Iptr));
 109             Result (Len + 1) := Character'Val (C and 16#00_FF#);
 110             Result (Len + 2) := Character'Val (Shift_Right (C, 8));
 111             Len := Len + 2;
 112             Iptr := Iptr + 1;
 113          end loop;
 114       end if;
 115 
 116       return Result (1 .. Len);
 117    end From_UTF_16;
 118 
 119    --------------------------
 120    -- Raise_Encoding_Error --
 121    --------------------------
 122 
 123    procedure Raise_Encoding_Error (Index : Natural) is
 124       Val : constant String := Index'Img;
 125    begin
 126       raise Encoding_Error with
 127         "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')';
 128    end Raise_Encoding_Error;
 129 
 130    ---------------
 131    -- To_UTF_16 --
 132    ---------------
 133 
 134    function To_UTF_16
 135      (Item         : UTF_String;
 136       Input_Scheme : UTF_XE_Encoding;
 137       Output_BOM   : Boolean := False) return UTF_16_Wide_String
 138    is
 139       Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1);
 140       Len    : Natural;
 141       Iptr   : Natural;
 142 
 143    begin
 144       if Item'Length mod 2 /= 0 then
 145          raise Encoding_Error with "UTF-16BE/LE string has odd length";
 146       end if;
 147 
 148       --  Deal with input BOM, skip if OK, error if bad BOM
 149 
 150       Iptr := Item'First;
 151 
 152       if Item'Length >= 2 then
 153          if Item (Iptr .. Iptr + 1) = BOM_16BE then
 154             if Input_Scheme = UTF_16BE then
 155                Iptr := Iptr + 2;
 156             else
 157                Raise_Encoding_Error (Iptr);
 158             end if;
 159 
 160          elsif Item (Iptr .. Iptr + 1) = BOM_16LE then
 161             if Input_Scheme = UTF_16LE then
 162                Iptr := Iptr + 2;
 163             else
 164                Raise_Encoding_Error (Iptr);
 165             end if;
 166 
 167          elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
 168             Raise_Encoding_Error (Iptr);
 169          end if;
 170       end if;
 171 
 172       --  Output BOM if specified
 173 
 174       if Output_BOM then
 175          Result (1) := BOM_16 (1);
 176          Len := 1;
 177       else
 178          Len := 0;
 179       end if;
 180 
 181       --  UTF-16BE case
 182 
 183       if Input_Scheme = UTF_16BE then
 184          while Iptr < Item'Last loop
 185             Len := Len + 1;
 186             Result (Len) :=
 187               Wide_Character'Val
 188                 (Character'Pos (Item (Iptr)) * 256 +
 189                    Character'Pos (Item (Iptr + 1)));
 190             Iptr := Iptr + 2;
 191          end loop;
 192 
 193       --  UTF-16LE case
 194 
 195       else
 196          while Iptr < Item'Last loop
 197             Len := Len + 1;
 198             Result (Len) :=
 199               Wide_Character'Val
 200                 (Character'Pos (Item (Iptr)) +
 201                  Character'Pos (Item (Iptr + 1)) * 256);
 202             Iptr := Iptr + 2;
 203          end loop;
 204       end if;
 205 
 206       return Result (1 .. Len);
 207    end To_UTF_16;
 208 
 209 end Ada.Strings.UTF_Encoding;