File : a-chahan.adb


   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT RUN-TIME COMPONENTS                         --
   4 --                                                                          --
   5 --              A D A . C H A R A C T E R S . H A N D L I N G               --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2013, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 --                                                                          --
  19 --                                                                          --
  20 --                                                                          --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31 
  32 with Ada.Characters.Latin_1;     use Ada.Characters.Latin_1;
  33 with Ada.Strings.Maps;           use Ada.Strings.Maps;
  34 with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants;
  35 
  36 package body Ada.Characters.Handling is
  37 
  38    ------------------------------------
  39    -- Character Classification Table --
  40    ------------------------------------
  41 
  42    type Character_Flags is mod 256;
  43    for Character_Flags'Size use 8;
  44 
  45    Control    : constant Character_Flags := 1;
  46    Lower      : constant Character_Flags := 2;
  47    Upper      : constant Character_Flags := 4;
  48    Basic      : constant Character_Flags := 8;
  49    Hex_Digit  : constant Character_Flags := 16;
  50    Digit      : constant Character_Flags := 32;
  51    Special    : constant Character_Flags := 64;
  52    Line_Term  : constant Character_Flags := 128;
  53 
  54    Letter     : constant Character_Flags := Lower or Upper;
  55    Alphanum   : constant Character_Flags := Letter or Digit;
  56    Graphic    : constant Character_Flags := Alphanum or Special;
  57 
  58    Char_Map : constant array (Character) of Character_Flags :=
  59    (
  60      NUL                         => Control,
  61      SOH                         => Control,
  62      STX                         => Control,
  63      ETX                         => Control,
  64      EOT                         => Control,
  65      ENQ                         => Control,
  66      ACK                         => Control,
  67      BEL                         => Control,
  68      BS                          => Control,
  69      HT                          => Control,
  70      LF                          => Control + Line_Term,
  71      VT                          => Control + Line_Term,
  72      FF                          => Control + Line_Term,
  73      CR                          => Control + Line_Term,
  74      SO                          => Control,
  75      SI                          => Control,
  76 
  77      DLE                         => Control,
  78      DC1                         => Control,
  79      DC2                         => Control,
  80      DC3                         => Control,
  81      DC4                         => Control,
  82      NAK                         => Control,
  83      SYN                         => Control,
  84      ETB                         => Control,
  85      CAN                         => Control,
  86      EM                          => Control,
  87      SUB                         => Control,
  88      ESC                         => Control,
  89      FS                          => Control,
  90      GS                          => Control,
  91      RS                          => Control,
  92      US                          => Control,
  93 
  94      Space                       => Special,
  95      Exclamation                 => Special,
  96      Quotation                   => Special,
  97      Number_Sign                 => Special,
  98      Dollar_Sign                 => Special,
  99      Percent_Sign                => Special,
 100      Ampersand                   => Special,
 101      Apostrophe                  => Special,
 102      Left_Parenthesis            => Special,
 103      Right_Parenthesis           => Special,
 104      Asterisk                    => Special,
 105      Plus_Sign                   => Special,
 106      Comma                       => Special,
 107      Hyphen                      => Special,
 108      Full_Stop                   => Special,
 109      Solidus                     => Special,
 110 
 111      '0' .. '9'                  => Digit + Hex_Digit,
 112 
 113      Colon                       => Special,
 114      Semicolon                   => Special,
 115      Less_Than_Sign              => Special,
 116      Equals_Sign                 => Special,
 117      Greater_Than_Sign           => Special,
 118      Question                    => Special,
 119      Commercial_At               => Special,
 120 
 121      'A' .. 'F'                  => Upper + Basic + Hex_Digit,
 122      'G' .. 'Z'                  => Upper + Basic,
 123 
 124      Left_Square_Bracket         => Special,
 125      Reverse_Solidus             => Special,
 126      Right_Square_Bracket        => Special,
 127      Circumflex                  => Special,
 128      Low_Line                    => Special,
 129      Grave                       => Special,
 130 
 131      'a' .. 'f'                  => Lower + Basic + Hex_Digit,
 132      'g' .. 'z'                  => Lower + Basic,
 133 
 134      Left_Curly_Bracket          => Special,
 135      Vertical_Line               => Special,
 136      Right_Curly_Bracket         => Special,
 137      Tilde                       => Special,
 138 
 139      DEL                         => Control,
 140      Reserved_128                => Control,
 141      Reserved_129                => Control,
 142      BPH                         => Control,
 143      NBH                         => Control,
 144      Reserved_132                => Control,
 145      NEL                         => Control + Line_Term,
 146      SSA                         => Control,
 147      ESA                         => Control,
 148      HTS                         => Control,
 149      HTJ                         => Control,
 150      VTS                         => Control,
 151      PLD                         => Control,
 152      PLU                         => Control,
 153      RI                          => Control,
 154      SS2                         => Control,
 155      SS3                         => Control,
 156 
 157      DCS                         => Control,
 158      PU1                         => Control,
 159      PU2                         => Control,
 160      STS                         => Control,
 161      CCH                         => Control,
 162      MW                          => Control,
 163      SPA                         => Control,
 164      EPA                         => Control,
 165 
 166      SOS                         => Control,
 167      Reserved_153                => Control,
 168      SCI                         => Control,
 169      CSI                         => Control,
 170      ST                          => Control,
 171      OSC                         => Control,
 172      PM                          => Control,
 173      APC                         => Control,
 174 
 175      No_Break_Space              => Special,
 176      Inverted_Exclamation        => Special,
 177      Cent_Sign                   => Special,
 178      Pound_Sign                  => Special,
 179      Currency_Sign               => Special,
 180      Yen_Sign                    => Special,
 181      Broken_Bar                  => Special,
 182      Section_Sign                => Special,
 183      Diaeresis                   => Special,
 184      Copyright_Sign              => Special,
 185      Feminine_Ordinal_Indicator  => Special,
 186      Left_Angle_Quotation        => Special,
 187      Not_Sign                    => Special,
 188      Soft_Hyphen                 => Special,
 189      Registered_Trade_Mark_Sign  => Special,
 190      Macron                      => Special,
 191      Degree_Sign                 => Special,
 192      Plus_Minus_Sign             => Special,
 193      Superscript_Two             => Special,
 194      Superscript_Three           => Special,
 195      Acute                       => Special,
 196      Micro_Sign                  => Special,
 197      Pilcrow_Sign                => Special,
 198      Middle_Dot                  => Special,
 199      Cedilla                     => Special,
 200      Superscript_One             => Special,
 201      Masculine_Ordinal_Indicator => Special,
 202      Right_Angle_Quotation       => Special,
 203      Fraction_One_Quarter        => Special,
 204      Fraction_One_Half           => Special,
 205      Fraction_Three_Quarters     => Special,
 206      Inverted_Question           => Special,
 207 
 208      UC_A_Grave                  => Upper,
 209      UC_A_Acute                  => Upper,
 210      UC_A_Circumflex             => Upper,
 211      UC_A_Tilde                  => Upper,
 212      UC_A_Diaeresis              => Upper,
 213      UC_A_Ring                   => Upper,
 214      UC_AE_Diphthong             => Upper + Basic,
 215      UC_C_Cedilla                => Upper,
 216      UC_E_Grave                  => Upper,
 217      UC_E_Acute                  => Upper,
 218      UC_E_Circumflex             => Upper,
 219      UC_E_Diaeresis              => Upper,
 220      UC_I_Grave                  => Upper,
 221      UC_I_Acute                  => Upper,
 222      UC_I_Circumflex             => Upper,
 223      UC_I_Diaeresis              => Upper,
 224      UC_Icelandic_Eth            => Upper + Basic,
 225      UC_N_Tilde                  => Upper,
 226      UC_O_Grave                  => Upper,
 227      UC_O_Acute                  => Upper,
 228      UC_O_Circumflex             => Upper,
 229      UC_O_Tilde                  => Upper,
 230      UC_O_Diaeresis              => Upper,
 231 
 232      Multiplication_Sign         => Special,
 233 
 234      UC_O_Oblique_Stroke         => Upper,
 235      UC_U_Grave                  => Upper,
 236      UC_U_Acute                  => Upper,
 237      UC_U_Circumflex             => Upper,
 238      UC_U_Diaeresis              => Upper,
 239      UC_Y_Acute                  => Upper,
 240      UC_Icelandic_Thorn          => Upper + Basic,
 241 
 242      LC_German_Sharp_S           => Lower + Basic,
 243      LC_A_Grave                  => Lower,
 244      LC_A_Acute                  => Lower,
 245      LC_A_Circumflex             => Lower,
 246      LC_A_Tilde                  => Lower,
 247      LC_A_Diaeresis              => Lower,
 248      LC_A_Ring                   => Lower,
 249      LC_AE_Diphthong             => Lower + Basic,
 250      LC_C_Cedilla                => Lower,
 251      LC_E_Grave                  => Lower,
 252      LC_E_Acute                  => Lower,
 253      LC_E_Circumflex             => Lower,
 254      LC_E_Diaeresis              => Lower,
 255      LC_I_Grave                  => Lower,
 256      LC_I_Acute                  => Lower,
 257      LC_I_Circumflex             => Lower,
 258      LC_I_Diaeresis              => Lower,
 259      LC_Icelandic_Eth            => Lower + Basic,
 260      LC_N_Tilde                  => Lower,
 261      LC_O_Grave                  => Lower,
 262      LC_O_Acute                  => Lower,
 263      LC_O_Circumflex             => Lower,
 264      LC_O_Tilde                  => Lower,
 265      LC_O_Diaeresis              => Lower,
 266 
 267      Division_Sign               => Special,
 268 
 269      LC_O_Oblique_Stroke         => Lower,
 270      LC_U_Grave                  => Lower,
 271      LC_U_Acute                  => Lower,
 272      LC_U_Circumflex             => Lower,
 273      LC_U_Diaeresis              => Lower,
 274      LC_Y_Acute                  => Lower,
 275      LC_Icelandic_Thorn          => Lower + Basic,
 276      LC_Y_Diaeresis              => Lower
 277    );
 278 
 279    ---------------------
 280    -- Is_Alphanumeric --
 281    ---------------------
 282 
 283    function Is_Alphanumeric (Item : Character) return Boolean is
 284    begin
 285       return (Char_Map (Item) and Alphanum) /= 0;
 286    end Is_Alphanumeric;
 287 
 288    --------------
 289    -- Is_Basic --
 290    --------------
 291 
 292    function Is_Basic (Item : Character) return Boolean is
 293    begin
 294       return (Char_Map (Item) and Basic) /= 0;
 295    end Is_Basic;
 296 
 297    ------------------
 298    -- Is_Character --
 299    ------------------
 300 
 301    function Is_Character (Item : Wide_Character) return Boolean is
 302    begin
 303       return Wide_Character'Pos (Item) < 256;
 304    end Is_Character;
 305 
 306    ----------------
 307    -- Is_Control --
 308    ----------------
 309 
 310    function Is_Control (Item : Character) return Boolean is
 311    begin
 312       return (Char_Map (Item) and Control) /= 0;
 313    end Is_Control;
 314 
 315    --------------
 316    -- Is_Digit --
 317    --------------
 318 
 319    function Is_Digit (Item : Character) return Boolean is
 320    begin
 321       return Item in '0' .. '9';
 322    end Is_Digit;
 323 
 324    ----------------
 325    -- Is_Graphic --
 326    ----------------
 327 
 328    function Is_Graphic (Item : Character) return Boolean is
 329    begin
 330       return (Char_Map (Item) and Graphic) /= 0;
 331    end Is_Graphic;
 332 
 333    --------------------------
 334    -- Is_Hexadecimal_Digit --
 335    --------------------------
 336 
 337    function Is_Hexadecimal_Digit (Item : Character) return Boolean is
 338    begin
 339       return (Char_Map (Item) and Hex_Digit) /= 0;
 340    end Is_Hexadecimal_Digit;
 341 
 342    ----------------
 343    -- Is_ISO_646 --
 344    ----------------
 345 
 346    function Is_ISO_646 (Item : Character) return Boolean is
 347    begin
 348       return Item in ISO_646;
 349    end Is_ISO_646;
 350 
 351    --  Note: much more efficient coding of the following function is possible
 352    --  by testing several 16#80# bits in a complete word in a single operation
 353 
 354    function Is_ISO_646 (Item : String) return Boolean is
 355    begin
 356       for J in Item'Range loop
 357          if Item (J) not in ISO_646 then
 358             return False;
 359          end if;
 360       end loop;
 361 
 362       return True;
 363    end Is_ISO_646;
 364 
 365    ---------------
 366    -- Is_Letter --
 367    ---------------
 368 
 369    function Is_Letter (Item : Character) return Boolean is
 370    begin
 371       return (Char_Map (Item) and Letter) /= 0;
 372    end Is_Letter;
 373 
 374    ------------------------
 375    -- Is_Line_Terminator --
 376    ------------------------
 377 
 378    function Is_Line_Terminator (Item : Character) return Boolean is
 379    begin
 380       return (Char_Map (Item) and Line_Term) /= 0;
 381    end Is_Line_Terminator;
 382 
 383    --------------
 384    -- Is_Lower --
 385    --------------
 386 
 387    function Is_Lower (Item : Character) return Boolean is
 388    begin
 389       return (Char_Map (Item) and Lower) /= 0;
 390    end Is_Lower;
 391 
 392    -------------
 393    -- Is_Mark --
 394    -------------
 395 
 396    function Is_Mark (Item : Character) return Boolean is
 397       pragma Unreferenced (Item);
 398    begin
 399       return False;
 400    end Is_Mark;
 401 
 402    ---------------------
 403    -- Is_Other_Format --
 404    ---------------------
 405 
 406    function Is_Other_Format (Item : Character) return Boolean is
 407    begin
 408       return Item = Soft_Hyphen;
 409    end Is_Other_Format;
 410 
 411    ------------------------------
 412    -- Is_Punctuation_Connector --
 413    ------------------------------
 414 
 415    function Is_Punctuation_Connector (Item : Character) return Boolean is
 416    begin
 417       return Item = '_';
 418    end Is_Punctuation_Connector;
 419 
 420    --------------
 421    -- Is_Space --
 422    --------------
 423 
 424    function Is_Space (Item : Character) return Boolean is
 425    begin
 426       return Item = ' ' or else Item = No_Break_Space;
 427    end Is_Space;
 428 
 429    ----------------
 430    -- Is_Special --
 431    ----------------
 432 
 433    function Is_Special (Item : Character) return Boolean is
 434    begin
 435       return (Char_Map (Item) and Special) /= 0;
 436    end Is_Special;
 437 
 438    ---------------
 439    -- Is_String --
 440    ---------------
 441 
 442    function Is_String (Item : Wide_String) return Boolean is
 443    begin
 444       for J in Item'Range loop
 445          if Wide_Character'Pos (Item (J)) >= 256 then
 446             return False;
 447          end if;
 448       end loop;
 449 
 450       return True;
 451    end Is_String;
 452 
 453    --------------
 454    -- Is_Upper --
 455    --------------
 456 
 457    function Is_Upper (Item : Character) return Boolean is
 458    begin
 459       return (Char_Map (Item) and Upper) /= 0;
 460    end Is_Upper;
 461 
 462    --------------
 463    -- To_Basic --
 464    --------------
 465 
 466    function To_Basic (Item : Character) return Character is
 467    begin
 468       return Value (Basic_Map, Item);
 469    end To_Basic;
 470 
 471    function To_Basic (Item : String) return String is
 472    begin
 473       return Result : String (1 .. Item'Length) do
 474          for J in Item'Range loop
 475             Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
 476          end loop;
 477       end return;
 478    end To_Basic;
 479 
 480    ------------------
 481    -- To_Character --
 482    ------------------
 483 
 484    function To_Character
 485      (Item       : Wide_Character;
 486       Substitute : Character := ' ') return Character
 487    is
 488    begin
 489       if Is_Character (Item) then
 490          return Character'Val (Wide_Character'Pos (Item));
 491       else
 492          return Substitute;
 493       end if;
 494    end To_Character;
 495 
 496    ----------------
 497    -- To_ISO_646 --
 498    ----------------
 499 
 500    function To_ISO_646
 501      (Item       : Character;
 502       Substitute : ISO_646 := ' ') return ISO_646
 503    is
 504    begin
 505       return (if Item in ISO_646 then Item else Substitute);
 506    end To_ISO_646;
 507 
 508    function To_ISO_646
 509      (Item       : String;
 510       Substitute : ISO_646 := ' ') return String
 511    is
 512       Result : String (1 .. Item'Length);
 513 
 514    begin
 515       for J in Item'Range loop
 516          Result (J - (Item'First - 1)) :=
 517            (if Item (J) in ISO_646 then Item (J) else Substitute);
 518       end loop;
 519 
 520       return Result;
 521    end To_ISO_646;
 522 
 523    --------------
 524    -- To_Lower --
 525    --------------
 526 
 527    function To_Lower (Item : Character) return Character is
 528    begin
 529       return Value (Lower_Case_Map, Item);
 530    end To_Lower;
 531 
 532    function To_Lower (Item : String) return String is
 533    begin
 534       return Result : String (1 .. Item'Length) do
 535          for J in Item'Range loop
 536             Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
 537          end loop;
 538       end return;
 539    end To_Lower;
 540 
 541    ---------------
 542    -- To_String --
 543    ---------------
 544 
 545    function To_String
 546      (Item       : Wide_String;
 547       Substitute : Character := ' ') return String
 548    is
 549       Result : String (1 .. Item'Length);
 550 
 551    begin
 552       for J in Item'Range loop
 553          Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
 554       end loop;
 555 
 556       return Result;
 557    end To_String;
 558 
 559    --------------
 560    -- To_Upper --
 561    --------------
 562 
 563    function To_Upper
 564      (Item : Character) return Character
 565    is
 566    begin
 567       return Value (Upper_Case_Map, Item);
 568    end To_Upper;
 569 
 570    function To_Upper
 571      (Item : String) return String
 572    is
 573    begin
 574       return Result : String (1 .. Item'Length) do
 575          for J in Item'Range loop
 576             Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
 577          end loop;
 578       end return;
 579    end To_Upper;
 580 
 581    -----------------------
 582    -- To_Wide_Character --
 583    -----------------------
 584 
 585    function To_Wide_Character
 586      (Item : Character) return Wide_Character
 587    is
 588    begin
 589       return Wide_Character'Val (Character'Pos (Item));
 590    end To_Wide_Character;
 591 
 592    --------------------
 593    -- To_Wide_String --
 594    --------------------
 595 
 596    function To_Wide_String
 597      (Item : String) return Wide_String
 598    is
 599       Result : Wide_String (1 .. Item'Length);
 600 
 601    begin
 602       for J in Item'Range loop
 603          Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
 604       end loop;
 605 
 606       return Result;
 607    end To_Wide_String;
 608 
 609 end Ada.Characters.Handling;