File : s-wchcnv.ads


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT RUN-TIME COMPONENTS                         --
   4 --                                                                          --
   5 --                       S Y S T E M . W C H _ C N V                        --
   6 --                                                                          --
   7 --                                 S p e c                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2013, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31 
  32 --  This package contains generic subprograms used for converting between
  33 --  sequences of Character and Wide_Character. Wide_Wide_Character values
  34 --  are also handled, but represented using integer range types defined in
  35 --  this package, so that this package can be used from applications that
  36 --  are restricted to Ada 95 compatibility (such as the compiler itself).
  37 
  38 --  All the algorithms for encoding and decoding are isolated in this package
  39 --  and in System.WCh_JIS and should not be duplicated elsewhere. The only
  40 --  exception to this is that GNAT.Decode_String and GNAT.Encode_String have
  41 --  their own circuits for UTF-8 conversions, for improved efficiency.
  42 
  43 --  This unit may be used directly from an application program by providing
  44 --  an appropriate WITH, and the interface can be expected to remain stable.
  45 
  46 pragma Compiler_Unit_Warning;
  47 
  48 with System.WCh_Con;
  49 
  50 package System.WCh_Cnv is
  51    pragma Pure;
  52 
  53    type UTF_32_Code is range 0 .. 16#7FFF_FFFF#;
  54    for UTF_32_Code'Size use 32;
  55    --  Range of allowed UTF-32 encoding values
  56 
  57    type UTF_32_String is array (Positive range <>) of UTF_32_Code;
  58 
  59    generic
  60       with function In_Char return Character;
  61    function Char_Sequence_To_Wide_Char
  62      (C  : Character;
  63       EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character;
  64    --  C is the first character of a sequence of one or more characters which
  65    --  represent a wide character sequence. Calling the function In_Char for
  66    --  additional characters as required, Char_To_Wide_Char returns the
  67    --  corresponding wide character value. Constraint_Error is raised if the
  68    --  sequence of characters encountered is not a valid wide character
  69    --  sequence for the given encoding method.
  70    --
  71    --  Note on the use of brackets encoding (WCEM_Brackets). The brackets
  72    --  encoding method is ambiguous in the context of this function, since
  73    --  there is no way to tell if ["1234"] is eight unencoded characters or
  74    --  one encoded character. In the context of Ada sources, any sequence
  75    --  starting [" must be the start of an encoding (since that sequence is
  76    --  not valid in Ada source otherwise). The routines in this package use
  77    --  the same approach. If the input string contains the sequence [" then
  78    --  this is assumed to be the start of a brackets encoding sequence, and
  79    --  if it does not match the syntax, an error is raised.
  80 
  81    generic
  82       with function In_Char return Character;
  83    function Char_Sequence_To_UTF_32
  84      (C  : Character;
  85       EM : System.WCh_Con.WC_Encoding_Method) return UTF_32_Code;
  86    --  This is similar to the above, but the function returns a code from
  87    --  the full UTF_32 code set, which covers the full range of possible
  88    --  values in Wide_Wide_Character. The result can be converted to
  89    --  Wide_Wide_Character form using Wide_Wide_Character'Val.
  90 
  91    generic
  92       with procedure Out_Char (C : Character);
  93    procedure Wide_Char_To_Char_Sequence
  94      (WC : Wide_Character;
  95       EM : System.WCh_Con.WC_Encoding_Method);
  96    --  Given a wide character, converts it into a sequence of one or
  97    --  more characters, calling the given Out_Char procedure for each.
  98    --  Constraint_Error is raised if the given wide character value is
  99    --  not a valid value for the given encoding method.
 100    --
 101    --  Note on brackets encoding (WCEM_Brackets). For the input routines above,
 102    --  upper half characters can be represented as ["hh"] but this procedure
 103    --  will only use brackets encodings for codes higher than 16#FF#, so upper
 104    --  half characters will be output as single Character values.
 105 
 106    generic
 107       with procedure Out_Char (C : Character);
 108    procedure UTF_32_To_Char_Sequence
 109      (Val : UTF_32_Code;
 110       EM  : System.WCh_Con.WC_Encoding_Method);
 111    --  This is similar to the above, but the input value is a code from the
 112    --  full UTF_32 code set, which covers the full range of possible values
 113    --  in Wide_Wide_Character. To convert a Wide_Wide_Character value, the
 114    --  caller can use Wide_Wide_Character'Pos in the call.
 115 
 116 end System.WCh_Cnv;