Fuzion Logo
flang.dev — The Fuzion Language Portal
JavaScript seems to be disabled. Functionality is limited.

codepoint.fz


# This file is part of the Fuzion language implementation.
#
# The Fuzion language implementation is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 3 of the License.
#
# The Fuzion language implementation is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along with The
# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.


# -----------------------------------------------------------------------
#
#  Tokiwa Software GmbH, Germany
#
#  Source code of Fuzion standard library feature codepoint
#
#  Author: Fridtjof Siebert (siebert@tokiwa.software)
#
# -----------------------------------------------------------------------

# codepoint represents a unicode codepoint
#
codepoint(val u32) : string, codepoints
  pre
    debug: range.contains(val),
    debug: !utf16Surrogate.contains val
is

  # the utf8 encoded bytes for the string representation
  # of this codepoint
  #
  redef utf8 Sequence<u8> is
    if      utf8EncodedInOneByte   .contains val then [ val.low8bits ]
    else if utf8EncodedInTwoBytes  .contains val then [ (u32 0xc0 | (val >>  6) & 0x1f).low8bits,
                                                        (u32 0x80 |  val        & 0x3f).low8bits ]
    else if utf8EncodedInThreeBytes.contains val then [ (u32 0xe0 | (val >> 12) & 0x1f).low8bits,
                                                        (u32 0x80 | (val >>  6) & 0x3f).low8bits,
                                                        (u32 0x80 |  val        & 0x3f).low8bits ]
    else if utf8EncodedInFourBytes .contains val then [ (u32 0xf0 | (val >> 18) & 0x07).low8bits,
                                                        (u32 0x80 | (val >> 12) & 0x3f).low8bits,
                                                        (u32 0x80 | (val >>  6) & 0x3f).low8bits,
                                                        (u32 0x80 |  val        & 0x3f).low8bits ]
    else
      fuzion.std.panic "failed to encode code point {codepoint.this}"


  # is this an ASCII code encoded in one byte
  #
  isAscii => ascii.contains val