Fuzion • APIs • Standard APIs • String.fz
String.fz


# This file is part of the Fuzion language implementation.
#
# The Fuzion language implementation is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, version 3 of the License.
#
# The Fuzion language implementation is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License along with The
# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.


# -----------------------------------------------------------------------
#
#  Tokiwa Software GmbH, Germany
#
#  Source code of Fuzion standard library feature string
#
#  Author: Fridtjof Siebert (siebert@tokiwa.software)
#
# -----------------------------------------------------------------------

# string -- immutable sequences of utf8 encoded unicode characters
#
public String ref : property.equatable, property.hashable, property.orderable is

  # converting a string to a string is just returning string.this
  public redef as_string String => String.this

  # any concrete string must implement utf8
  public utf8 Sequence u8 => abstract

  # is this string empty?
  public is_empty => utf8.is_empty

  # returns true if string is empty or contains whitespace only
  public is_blank => utf8 ∀ u -> u.is_ascii_white_space

  # returns true if string contains whitespace
  public contains_whitespace => utf8 ∃ u -> u.is_ascii_white_space

  # length of this string in bytes
  public byte_length => utf8.count

  # length of this string in codepoints
  public codepoint_length => as_codepoints.count

  # concatenate string with string representation of another object
  public infix + (other Any) String =>
    String.concat String.this other.as_string



  # repeat string given number of times
  public infix * (n i32) ref : String
  pre
    n ≥ 0
    # NYI String.this.utf8.finite, Strings where utf8 is a list are currently always infinite.
  is
    redef utf8 Sequence u8 =>
      bytes := String.this.utf8
      bytes.cycle.take (bytes.count * n)


  # equality: compare two strings byte-by-byte
  #
  # result is true iff the strings have the same number of utf8 bytes and those
  # bytes are equal.
  #
  public fixed type.equality(a, b String) =>
    ((a.utf8.zip b.utf8 aa,bb->aa=bb) ∀ x->x)
      & a.utf8.count = b.utf8.count


  # is `a` less than or equal to `b` when comparing their utf8 bytes?
  #
  # This defines a total order over strings that is unrelated to alphabetic order.
  #
  fixed type.lteq(a, b String) =>
    a.utf8
      .zip b.utf8 (c,d)->(c,d)
      .filter (x ->
        (c, d) := x
        c != d)
      .map (x ->
        (c, d) := x
        c ≤ d)
      # if all bytes are equal lengths of strings might still differ
      .first a.utf8.count≤b.utf8.count


  # create hash code from a string
  #
  public type.hash_code(a String.this) u64 =>
    sh_l := u64 13
    sh_r := u64 51
    for
      h u64 := 0, ((h << sh_l) | (h >> sh_r)) ^ b.as_u64;
      b in a.utf8
    while true
    else
      h


  # internal helper to create error for failed parsing
  #
  private parse_error(msg String) => error "failed to parse '{String.this}': $msg"


  # parse this string as a signed 32-bit integer value
  #
  public parse_i32        => parse_i32 10
  public parse_i32_binary => parse_i32 2
  public parse_i32_octal  => parse_i32 8
  public parse_i32_hex    => parse_i32 16
  public parse_i32 (base i32) outcome i32
    pre 1 < base ≤ 36
  =>
    parse_integer i32 base


  # parse this string as an unsigned 32-bit integer value
  #
  public parse_u32        => parse_u32 10
  public parse_u32_binary => parse_u32 2
  public parse_u32_octal  => parse_u32 8
  public parse_u32_hex    => parse_u32 16
  public parse_u32 (base u32) outcome u32
    pre u32 1 < base ≤ (u32 36)
  =>
    parse_integer u32 base


  # parse this string as a signed 64-bit integer value
  #
  public parse_i64        => parse_i64 10
  public parse_i64_binary => parse_i64 2
  public parse_i64_octal  => parse_i64 8
  public parse_i64_hex    => parse_i64 16
  public parse_i64 (base i64) outcome i64
    pre i64 1 < base ≤ (i64 36)
  =>
    parse_integer i64 base


  # parse this string as an unsigned 64-bit integer value
  #
  public parse_u64        => parse_u64 10
  public parse_u64_binary => parse_u64 2
  public parse_u64_octal  => parse_u64 8
  public parse_u64_hex    => parse_u64 16
  public parse_u64 (base u64) outcome u64
    pre u64 1 < base ≤ (u64 36)
  =>
    parse_integer u64 base


  # parse this string as an int value of arbitrary size
  #
  public parse_int        => parse_int int(10)
  public parse_int_binary => parse_int int(2)
  public parse_int_octal  => parse_int int(8)
  public parse_int_hex    => parse_int int(16)
  public parse_int (base int) outcome int
    pre (int 1) < base ≤ int 36
  =>
    parse_integer int base


  # parse this string as a integer value given as type parameter
  #
  public parse_integer(
    # the integer type
    T type : integer,

    # base gives the base of the integer, must be between 2 and 36, inclusive.
    base T

    ) outcome T

    pre T.one < base ≤ T.from_u32 36

  =>
    match utf8.as_list
      nil => parse_error "empty string"
      c Cons =>
        negate := c.head = String.minus_char
        d := if (negate || c.head = String.plus_char) String.zero_char else c.head
        parse_integer T base negate T.zero d c.tail


  # recursive helper for parse_integer T
  #
  private parse_integer(
    # the integer type
    T type : integer,

    # base gives the base, between 2 and 36
    base T,

    # do we parse a negative number?
    neg bool,

    # the value of the highest digits already parsed
    hi num_option T,

    # the current character to be parsed
    c u8,

    # the remaining characters to be parsed
    s list u8

    ) outcome T

  =>

    d outcome u8 :=
      if      (String.zero_char  ≤ c ≤ String.nine_char ) c - String.zero_char
      else if (String.a_char     ≤ c ≤ String.z_char    ) c - String.a_char    + 10
      else if (String.cap_a_char ≤ c ≤ String.cap_z_char) c - String.cap_a_char + 10
      else parse_error "non-digit found"

    d.bind T b->
      t := parse_integer.this.T.from_u32 b.as_u32  # i converted to T
      if t ≥ base
        parse_error "invalid integer digit for base $base"
      else
        hi := hi *? base;
        v := if (neg) hi -? t
              else     hi +? t
        match s
          c Cons =>
            parse_integer T base neg v c.head c.tail
          nil =>
            v ? nil => parse_error "numerical overflow"
              | u T => u


  # convert this string into an array of codepoints.
  #
  codepoint_array => as_codepoints.as_array


  # convert this string into a Sequence of codepoint and errors for encoding problems
  # found in the underlying utf8 bytes
  #
  public as_codepoints Sequence codepoint =>
    codepoints_and_errors
      .map (x ->
        match x
          c codepoint => c
          e error     => codepoint 0xFFFD # 'REPLACEMENT CHARACTER' (U+FFFD)
      )


  # replaces invalid UTF-8 byte sequences in this string with the Unicode
  # replacement character (U+FFFD).
  to_valid_utf8 String =>
    to_valid_utf8 (codepoint 0xFFFD) # 'REPLACEMENT CHARACTER'


  # replaces invalid UTF-8 byte sequences in this string with the given
  # string.
  to_valid_utf8(replacement String) String =>
    codepoints_and_errors
      .reduce "" r,x->
        match x
          c codepoint => r + c
          e error => r + replacement


  # convert this string into a list of codepoint and errors for encoding problems
  # found in the underlying utf8 bytes
  #
  public codepoints_and_errors list (outcome codepoint) =>
    codepoints_and_errors utf8.as_list


  # the list instance returned by as_codepoints
  #
  private codepoints_and_errors(l list u8) list (outcome codepoint) =>
    match l
      nil => nil
      c1 Cons =>
        # return list of c and rest
        ret(c outcome codepoint, rest list u8) list (outcome codepoint) =>
          ref : Cons (outcome codepoint) (list (outcome codepoint))
            head => c
            tail => codepoints_and_errors rest

        p := codepoint.type
        e(msg String) => error "Bad UTF8 encoding found: cannot decode $msg"

        b1 := c1.head
        e1(msg String) => ret (e "$b1: $msg") c1.tail
        # UTF-8 definition taken from https://en.wikipedia.org/wiki/UTF-8
        if b1.as_u32 ∈ p.utf8_encoded_in_one_byte           # ASCII
          ret (codepoint b1.as_u32) c1.tail
        else if 0xc0 ≤ b1 ≤ 0xf4
          match c1.tail
            nil => e1 "end of String, expected continuation byte"
            c2 Cons =>
              b2 := c2.head
              e2(msg String) => ret (e "$b1, $b2: $msg") c2.tail
              if (b2 & 0xc0) != 0x80
                e2 "expected continuation byte in the range 0x80..0xbf."
              else if 0xc0 ≤ b1 ≤ 0xdf   # 0x0080..0x7ff encoded in 2 bytes
                res := (b1.as_u32 & 0x1f) << 6 | (b2.as_u32 & 0x3f)
                if res ∉ p.utf8_encoded_in_two_bytes
                  e2 "codepoint $res uses overlong 2-byte encoding, allowed for range {p.utf8_encoded_in_two_bytes}."
                else
                  ret (codepoint res) c2.tail
              else if u8 0xe0 ≤ b1
                match c2.tail
                  nil => e2 "end of String, expected continuation byte"
                  c3 Cons =>
                    b3 := c3.head
                    e3(msg String) => ret (e "$b1, $b2, $b3: $msg") c3.tail
                    if (b3 & 0xc0) != 0x80
                      e3 "expected two continuation bytes in the range 0x80..0xbf."
                    else if b1 ≤ 0xef       # 0x0800..0xffff encoded in 3 bytes
                      res := (((b1.as_u32 & 0x0f) << 12) |
                              ((b2.as_u32 & 0x3f) <<  6) |
                              ((b3.as_u32 & 0x3f)      )   )
                      if res ∉ p.utf8_encoded_in_three_bytes
                        e3 "codepoint $res uses overlong 3-byte encoding, allowed for range {p.utf8_encoded_in_two_bytes}."
                      else if res ∈ p.utf16_surrogate
                        e3 "codepoint $res is invalid, values in the range {p.utf16_surrogate} are reserved for UTF-16 surrogate halves."
                      else if res ∈ p.not_a_character
                        e3 "codepoint $res is not a valid unicode character {p.not_a_character}."
                      else
                        ret (codepoint res) c3.tail
                    else                     # 0x010000..0x10ffff encoded in 4 bytes
                      match c3.tail
                        nil => e3 "end of String, expected continuation byte"
                        c4 Cons =>
                          b4 := c4.head
                          e4(msg String) => ret (e "$b1, $b2, $b3, $b4: $msg") c4.tail
                          if (b4 & 0xc0) != 0x80
                            e4 "expected three continuation bytes in the range 0x80..0xbf."
                          else
                            res := (((b1.as_u32 & 0x07) << 18) |
                                    ((b2.as_u32 & 0x3f) << 12) |
                                    ((b3.as_u32 & 0x3f) <<  6) |
                                    ((b4.as_u32 & 0x3f)      )   )
                            if res ∉ p.utf8_encoded_in_four_bytes
                              e4 "codepoint $res uses overlong 4-byte encoding."
                            else if res > (u32 0x10ffff)
                              e4 "codepoint $res is outside of the allowed range for codepoints 0x000000..0x10ffff."
                            else
                              ret (codepoint res) c4.tail
              else fuzion.std.panic "String.codepoints_and_errors: missing case for $b1"
        else if 0x80 ≤ b1 ≤ 0xbf then e1 "stray continuation byte without preceding leading byte."
        else if 0xf5 ≤ b1 ≤ 0xfd then e1 "codes 0xf5..0xfd are undefined."
        else if 0xfe ≤ b1 ≤ 0xff then e1 "codes 0xfe and 0xff are undefined, used for endianess checking."
        else
          fuzion.std.panic "String.codepoints_and_errors: missing case for $b1"


  # create substring of this string consisting of bytes from (inclusive) .. to (exclusive).
  #
  public substring(from, to i32) String
    pre
      debug: 0 ≤ from ≤ to ≤ String.this.byte_length
    # NYI check if from/to are valid start/end bytes?
  =>
    String.from_bytes (String.this.utf8.slice from to)


  # create substring of this string consisting of bytes from (inclusive) .. byte_length (exclusive).
  #
  public substring(from i32) String
    pre
      debug: 0 ≤ from ≤ byte_length
  =>
    substring from byte_length


  # create substring of this string consisting of codepoints from (inclusive) .. to (exclusive).
  #
  public substring_codepoint(from, to i32) String
    pre
      debug: 0 ≤ from ≤ to ≤ String.this.codepoint_length
  =>
    codepoint_array
      .slice from to
      .map String c->c  # NYI: this should maybe not be needed since codepoint is a string
      .fold String.concat


  # create substring of this string consisting of codepoints from (inclusive) .. codepoint_length (exclusive).
  #
  public substring_codepoint(from i32) String
    pre
      debug: 0 ≤ from ≤ codepoint_length
  =>
    substring from codepoint_length


  # check if this string starts with given prefix
  #
  public starts_with(prefx String) =>
    (container.searchable_sequence utf8).starts_with prefx.utf8


  # check if this string ends with given suffix
  #
  public ends_with(suffix String) =>
    l  := byte_length
    sl := suffix.byte_length
    end := utf8.drop l-sl
    (container.searchable_sequence end).starts_with suffix.utf8


  # find (utf8-byte-) index of 'substring' witin this string.
  #
  public find(substring String) =>
    (container.searchable_sequence utf8).find substring.utf8


  # find (utf8-byte-) index of 'substring' witin this string.
  #
  public find(substring String,
              # start search at this index
              from i32)
  =>
    (container.searchable_sequence (utf8.drop from)).find substring.utf8


  # find (utf8-byte-) index of last occurrence of 'substring'
  # within this string.
  public find_last(substring String) option i32 =>
    find_last substring nil


  # find (utf8-byte-) index of last occurrence of 'substring'
  # within this string.
  private find_last(substring String, found option i32) option i32 =>
    match find substring
      nil => found >>= (pos -> pos - substring.byte_length)
      idx i32 =>
        skip := idx + substring.byte_length
        s := String.from_bytes (utf8.drop skip)

        s.find_last substring (skip + (found.get 0))


  # replace all occurrences of old by new
  #
  public replace (old, new String) => String.from_bytes ((container.searchable_sequence utf8).replace old.utf8 new.utf8)


  # replace the first n occurrences of old by new
  public replace(old, new String, n u64) => String.from_bytes ((container.searchable_sequence utf8).replace old.utf8 new.utf8 n)


  # does this string contain the given 'substring'
  #
  public contains (substring String) => find(substring).exists


  # count number of occurrences of given 'substring' in this string
  #
  public count (substring String) =>
    (container.searchable_sequence utf8).count_matches substring.utf8


  # Split string separated by (ASCII) white space
  #
  # Leading and trailing white space is ignored, repeated white space is treated
  # like a single white space
  #
  # The result is a, possibly empty, list of separate non-empty strings.
  #
  public split list String =>
    l := utf8.as_list.drop_while (c -> c.is_ascii_white_space)
    if l.is_empty
      nil
    else
      h :=  String.from_bytes (l.take_while (c -> !c.is_ascii_white_space)).as_array
      t := (String.from_bytes (l.drop_while (c -> !c.is_ascii_white_space))).split
      ref : Cons String (list String)
        head => h
        tail => t


  # split string at s
  #
  public split(s String) list String
    pre
      !s.is_empty
    =>
      split0 s nil false


  # split string after s, that is do the same thing as split but
  # include the separator s in the resulting strings
  #
  public split_after(s String) list String
    pre
      !s.is_empty
    =>
      split0 s nil true


  # split string at s, for at most n occurrences of s
  #
  # if s occurs in the string less than n times, the resulting list will have
  # less than n elements
  #
  public split_n(s String, n u32) list String
    pre
      debug: !s.is_empty
      debug: n > (u32 0)
    =>
      split0 s n false


  # split string after s, for at most n occurrences of s
  #
  # if s occurs in the string less than n times, the resulting list will have
  # less than n elements
  #
  public split_after_n(s String, n u32) list String
    pre
      !s.is_empty
      n > (u32 0)
    =>
      split0 s n true


  # split string at s, if there is no limit, otherwise if limit is an integer n,
  # for at most n occurrences of s
  #
  # if split_after is true, all but the last element of the resulting list include
  # the separator
  #
  # helper feature which unifies the code of the different split features in one
  #
  private split0(s String, limit option u32, split_after bool) list String
    pre
      debug: !s.is_empty
      debug: match limit
               nil => true
               n u32 => n > (u32 0)
    =>
      (container.searchable_sequence utf8).split0 s.utf8 limit split_after
          .map_to_list x->(String.from_bytes x)


  # remove leading and trailing white space from this string
  #
  public trim String =>
    s0 := utf8

    s1 := (s0.drop_while c->c.is_ascii_white_space).reverse
    s2 := (s1.drop_while c->c.is_ascii_white_space).reverse

    String.from_bytes s2


  # remove leading white space from this string
  #
  trim_start =>
    String.from_bytes (utf8.drop_while c->c.is_ascii_white_space)


  # remove trailing white space from this string
  #
  trim_end =>
    String.from_bytes (utf8.as_list.reverse.drop_while c->c.is_ascii_white_space).reverse


  # pad this string at the end with spaces such that its `codepoint_length` is at least `n`.
  #
  public pad(n i32)
  pre
    debug: n >= 0
  post
    debug: result.codepoint_length >= n
    debug: ((result.codepoint_length != n): result = String.this)
  =>
    pad " " n


  # pad this string at the beginning with spaces such that its `codepoint_length` is at least `n`.
  #
  public pad_left(n i32)
  pre
    debug: n >= 0
  post
    debug: result.codepoint_length >= n
    debug: ((result.codepoint_length != n): result = String.this)
  =>
    pad_left " " n


  # pad this string at the beginning and at the end with spaces such that its `codepoint_length` is at least `n`.
  #
  # In case the required number of codepoints to add is odd, the padding at the end will be longer.
  #
  public pad_center(n i32)
  pre
    debug: n >= 0
  post
    debug: result.codepoint_length >= n
    debug: ((result.codepoint_length != n): result = String.this)
  =>
    pad_center " " n


  # Helper for pad, pad_left and pad_right to determine minimum number of copies
  # of `p` needed to pad String.this to length >= n.
  #
  private pad_count(p String, n i32) =>
    pl := p.codepoint_length
    add_cps := max 0 n-codepoint_length
    (add_cps + pl - 1) / pl


  # pad this string at the end with `p` such that its `codepoint_length` is at least `n`.
  #
  public pad(p String, n i32)
  pre
    safety: p.codepoint_length > 0
    debug: n >= 0
  post
    debug: result.codepoint_length >= n
    debug: ((result.codepoint_length >= n+p.codepoint_length): result = String.this)
  =>
    String.this + p * (pad_count p n)


  # pad this string at the beginning with `p` such that its `codepoint_length` is at least `n`.
  #
  public pad_left(p String, n i32)
  pre
    safety: p.codepoint_length > 0
    debug: n >= 0
  post
    debug: result.codepoint_length >= n
    debug: ((result.codepoint_length >= n+p.codepoint_length): result = String.this)
  =>
    p * (pad_count p n) + String.this


  # pad this string at the beginning and at the end with `p` such that its `codepoint_length` is at least `n`.
  #
  # In case the required number of copies of `p` is odd, the padding at the end will be longer.
  #
  public pad_center(p String, n i32)
  pre
    safety: p.codepoint_length > 0
    debug: n >= 0
  post
    debug: result.codepoint_length >= n
    debug: ((result.codepoint_length >= n+p.codepoint_length): result = String.this)
  =>
    pn := pad_count p n
    l := pn/2
    r := pn-l
    p*l + String.this + p*r


  # is this part of given set
  #
  element_of(s container.Set String) => s.contains String.this
  public infix ∈ (s container.Set String) => String.this.element_of s


  # is this not part of given set
  #
  not_element_of(s container.Set String) => !element_of s
  public infix ∉ (s container.Set String) => String.this.not_element_of s


  # return string of at least length l by
  # padding codepoint s to start of string
  #
  public pad_codepoint_start(l i32, s String) String
  pre s.codepoint_length = 1
  =>
    missing := l - codepoint_length
    if missing ≤ 0
      String.this
    else
      (s * missing) + String.this


  # Splits this string at codepoints where p is true and returns the result as a
  # list of strings. In case multiple, neighboring codepoints in the string are
  # evaluated to be true by p, this does not cause empty strings to be added to
  # the result list, rather this case is being treated as being one big separator.
  #
  public fields_func(p codepoint -> bool) list String =>
    state_wrapper(l list String, in_run bool, start_pos, current_pos i32) is

    i := state_wrapper (list String).empty false 0 0
    last_state := as_codepoints.reduce state_wrapper i ((r, c) ->
      match p c
        TRUE =>
          match r.in_run
            TRUE => state_wrapper r.l true r.current_pos (r.current_pos + 1)
            FALSE => state_wrapper (r.l ++ [(substring_codepoint r.start_pos (r.current_pos))].as_list) true r.current_pos (r.current_pos + 1)
        FALSE =>
          match r.in_run
            TRUE => state_wrapper r.l false r.current_pos (r.current_pos + 1)
            FALSE => state_wrapper r.l false r.start_pos (r.current_pos + 1))

    match last_state.in_run
      TRUE => last_state.l
      FALSE => last_state.l ++ [(substring_codepoint last_state.start_pos)]


  # Cuts out the first appearance of the string sep from this string, in other words,
  # returns a tuple of two strings and a bool, the first string is the substring before
  # the first appreance of sep, the second string is the substring after the first
  # appearance of sep. The bool result is true iff sep appears in this string.
  #
  # If sep does not appear in this string at all, return this string as the first string,
  # the empty string as the second, and false as the bool.
  #
  public cut(sep String) tuple String String bool =>
    match find sep
      nil => (String.this.as_string, "", false)
      i i32 =>
        l := byte_length
        sepl := sep.byte_length

        before := String.from_bytes (utf8.slice 0 i)
        after := String.from_bytes (utf8.slice (i + sepl) l)
        (before, after, true)


  # convert this string to upper case
  public upper_case String =>
    String.from_codepoints (as_codepoints.map (c -> character_encodings.unicode.mappings.upper_case[c.val].get c))


  # convert this string to lower case
  public lower_case String =>
    String.from_codepoints (as_codepoints.map (c -> character_encodings.unicode.mappings.lower_case[c.val].get c))


  # Does this String consist of nothing but ascii codepoints?
  #
  public is_ascii =>
    as_codepoints ∀ cp -> cp.is_ascii


  # monoid of strings with infix + operation.
  #
  public type.concat : Monoid String is
    redef infix ∙ (a, b String) => a + b
    redef e => ""


  # monoid of strings with infix '+ sep +' operation, i.e., concatenate with
  # given separator
  #
  public type.concat(sep String) : Monoid String is
    redef infix ∙ (a, b String) String => if (a.is_empty || b.is_empty) a + b else a + sep + b
    redef e => ""


  # concat strings a and b by
  # concatenating their byte sequences.
  #
  public type.concat(a, b String) String =>
    ref : String
      utf8 Sequence u8 => a.utf8 ++ b.utf8


  # Takes a sequence of strings and concatenates its elements, while adding the separator
  # sep in between its elements. In case an empty sequence is given, returns the empty string.
  #
  public type.join(elems Sequence String, sep String) String =>
    (elems.as_list.intersperse sep).fold concat


  # Takes a sequence of strings and concatenates its elements.
  # In case an empty sequence is given, returns the empty string.
  #
  public type.join(elems Sequence String) String =>
    elems.fold concat


  # create string by concatenating the results of $a[a.indices].
  #
  # This uses a growing array if further strings are appended using 'infix +',
  # so it avoids quadratic runtime caused if each 'infix +' would create its
  # own concatenation-string.
  #
  # The performance of creating a string a0+a1+a2+...+a<n> is in O(n) since the
  # backing array is shared and doubled in size when full (so the final array size
  # is less than 2n in size and the sum of all arrays is less than 4n = 2n + n +
  # n/2+n/4+...).
  #
  # The performance of iterating the utf8 bytes of a string is O(l+n) for an
  # array of length l created by concatenating n sub-strings.
  #
  public type.from_mutable_array(a Mutable_Array Any) ref : String is  // NYI: Remove?

    redef utf8 Sequence u8 =>
      a.flat_map u8 ai->ai.as_string.utf8


    redef infix + (other Any) String =>
      a.add other
      from_mutable_array a


  # create string from the given utf8 bytes
  #
  public type.from_bytes(utf8_bytes Sequence u8) String =>
    ref : String
      redef utf8 := utf8_bytes.as_array_backed


  # create string from the given codepoints
  #
  public type.from_codepoints(codepoints Sequence codepoint) String =>
    ref : String
      utf8 Sequence u8 =>
        codepoints
          .flat_map u8 x->x.utf8
          .as_array_backed


  # NYI: remove the convenience functions when Fuzion supports char literals
  #
  public type.minus_char  => "-".utf8.first
  public type.plus_char   => "+".utf8.first
  public type.zero_char   => "0".utf8.first
  public type.nine_char   => "9".utf8.first
  public type.a_char      => "a".utf8.first
  public type.z_char      => "z".utf8.first
  public type.cap_a_char  => "A".utf8.first
  public type.cap_z_char  => "Z".utf8.first
next: Type.fz