File : g-encstr.adb


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT RUN-TIME COMPONENTS                         --
   4 --                                                                          --
   5 --                    G N A T . E N C O D E _ S T R I N G                   --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --           Copyright (C) 2007-2010, Free Software Foundation, Inc.        --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31 
  32 with Interfaces; use Interfaces;
  33 
  34 with System.WCh_Con; use System.WCh_Con;
  35 with System.WCh_Cnv; use System.WCh_Cnv;
  36 
  37 package body GNAT.Encode_String is
  38 
  39    -----------------------
  40    -- Local Subprograms --
  41    -----------------------
  42 
  43    procedure Bad;
  44    pragma No_Return (Bad);
  45    --  Raise error for bad character code
  46 
  47    procedure Past_End;
  48    pragma No_Return (Past_End);
  49    --  Raise error for off end of string
  50 
  51    ---------
  52    -- Bad --
  53    ---------
  54 
  55    procedure Bad is
  56    begin
  57       raise Constraint_Error with
  58         "character cannot be encoded with given Encoding_Method";
  59    end Bad;
  60 
  61    ------------------------
  62    -- Encode_Wide_String --
  63    ------------------------
  64 
  65    function Encode_Wide_String (S : Wide_String) return String is
  66       Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
  67       Result : String (1 .. S'Length * Long);
  68       Length : Natural;
  69    begin
  70       Encode_Wide_String (S, Result, Length);
  71       return Result (1 .. Length);
  72    end Encode_Wide_String;
  73 
  74    procedure Encode_Wide_String
  75      (S      : Wide_String;
  76       Result : out String;
  77       Length : out Natural)
  78    is
  79       Ptr : Natural;
  80 
  81    begin
  82       Ptr := S'First;
  83       for J in S'Range loop
  84          Encode_Wide_Character (S (J), Result, Ptr);
  85       end loop;
  86 
  87       Length := Ptr - S'First;
  88    end Encode_Wide_String;
  89 
  90    -----------------------------
  91    -- Encode_Wide_Wide_String --
  92    -----------------------------
  93 
  94    function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is
  95       Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
  96       Result : String (1 .. S'Length * Long);
  97       Length : Natural;
  98    begin
  99       Encode_Wide_Wide_String (S, Result, Length);
 100       return Result (1 .. Length);
 101    end Encode_Wide_Wide_String;
 102 
 103    procedure Encode_Wide_Wide_String
 104      (S      : Wide_Wide_String;
 105       Result : out String;
 106       Length : out Natural)
 107    is
 108       Ptr : Natural;
 109 
 110    begin
 111       Ptr := S'First;
 112       for J in S'Range loop
 113          Encode_Wide_Wide_Character (S (J), Result, Ptr);
 114       end loop;
 115 
 116       Length := Ptr - S'First;
 117    end Encode_Wide_Wide_String;
 118 
 119    ---------------------------
 120    -- Encode_Wide_Character --
 121    ---------------------------
 122 
 123    procedure Encode_Wide_Character
 124      (Char   : Wide_Character;
 125       Result : in out String;
 126       Ptr    : in out Natural)
 127    is
 128    begin
 129       Encode_Wide_Wide_Character
 130         (Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr);
 131 
 132    exception
 133       when Constraint_Error =>
 134          Bad;
 135    end Encode_Wide_Character;
 136 
 137    --------------------------------
 138    -- Encode_Wide_Wide_Character --
 139    --------------------------------
 140 
 141    procedure Encode_Wide_Wide_Character
 142      (Char   : Wide_Wide_Character;
 143       Result : in out String;
 144       Ptr    : in out Natural)
 145    is
 146       U : Unsigned_32;
 147 
 148       procedure Out_Char (C : Character);
 149       pragma Inline (Out_Char);
 150       --  Procedure to store one character for instantiation below
 151 
 152       --------------
 153       -- Out_Char --
 154       --------------
 155 
 156       procedure Out_Char (C : Character) is
 157       begin
 158          if Ptr > Result'Last then
 159             Past_End;
 160          else
 161             Result (Ptr) := C;
 162             Ptr := Ptr + 1;
 163          end if;
 164       end Out_Char;
 165 
 166    --  Start of processing for Encode_Wide_Wide_Character;
 167 
 168    begin
 169       --  Efficient code for UTF-8 case
 170 
 171       if Encoding_Method = WCEM_UTF8 then
 172 
 173          --  Note: for details of UTF8 encoding see RFC 3629
 174 
 175          U := Unsigned_32 (Wide_Wide_Character'Pos (Char));
 176 
 177          --  16#00_0000#-16#00_007F#: 0xxxxxxx
 178 
 179          if U <= 16#00_007F# then
 180             Out_Char (Character'Val (U));
 181 
 182          --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
 183 
 184          elsif U <= 16#00_07FF# then
 185             Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
 186             Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
 187 
 188          --  16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
 189 
 190          elsif U <= 16#00_FFFF# then
 191             Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
 192             Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
 193                                                           and 2#00111111#)));
 194             Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
 195 
 196          --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 197 
 198          elsif U <= 16#10_FFFF# then
 199             Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
 200             Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
 201                                                           and 2#00111111#)));
 202             Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
 203                                                        and 2#00111111#)));
 204             Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
 205 
 206          --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
 207          --                               10xxxxxx 10xxxxxx
 208 
 209          elsif U <= 16#03FF_FFFF# then
 210             Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
 211             Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
 212                                                        and 2#00111111#)));
 213             Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
 214                                                        and 2#00111111#)));
 215             Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
 216                                                        and 2#00111111#)));
 217             Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
 218 
 219          --  All other cases are invalid character codes, not this includes:
 220 
 221          --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
 222          --                               10xxxxxx 10xxxxxx 10xxxxxx
 223 
 224          --  since Wide_Wide_Character values cannot exceed 16#3F_FFFF#
 225 
 226          else
 227             Bad;
 228          end if;
 229 
 230       --  All encoding methods other than UTF-8
 231 
 232       else
 233          Non_UTF8 : declare
 234             procedure UTF_32_To_String is
 235               new UTF_32_To_Char_Sequence (Out_Char);
 236             --  Instantiate conversion procedure with above Out_Char routine
 237 
 238          begin
 239             UTF_32_To_String
 240               (UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method);
 241 
 242          exception
 243             when Constraint_Error =>
 244                Bad;
 245          end Non_UTF8;
 246       end if;
 247    end Encode_Wide_Wide_Character;
 248 
 249    --------------
 250    -- Past_End --
 251    --------------
 252 
 253    procedure Past_End is
 254    begin
 255       raise Constraint_Error with "past end of string";
 256    end Past_End;
 257 
 258 end GNAT.Encode_String;