(**************************************************************************)
(*                                                                        *)
(*                                 OCaml                                  *)
(*                                                                        *)
(*                           Daniel C. Buenzli                            *)
(*                                                                        *)
(*   Copyright 2014 Institut National de Recherche en Informatique et     *)
(*     en Automatique.                                                    *)
(*                                                                        *)
(*   All rights reserved.  This file is distributed under the terms of    *)
(*   the GNU Lesser General Public License version 2.1, with the          *)
(*   special exception on linking described in the file LICENSE.          *)
(*                                                                        *)
(**************************************************************************)

(** Unicode characters.

    @since 4.03 *)

type t
[@@immediate]
(** The type for Unicode characters.

    A value of this type represents a Unicode
    {{:http://unicode.org/glossary/#unicode_scalar_value}scalar
    value} which is an integer in the ranges [0x0000]...[0xD7FF] or
    [0xE000]...[0x10FFFF]. *)

val min : t
(** [min] is U+0000. *)

val max : t
(** [max] is U+10FFFF. *)

val bom : t
(** [bom] is U+FEFF, the
    {{:http://unicode.org/glossary/#byte_order_mark}byte order mark} (BOM)
    character.

    @since 4.06 *)

val rep : t
(** [rep] is U+FFFD, the
    {{:http://unicode.org/glossary/#replacement_character}replacement}
    character.

    @since 4.06 *)

val succ : t -> t
(** [succ u] is the scalar value after [u] in the set of Unicode scalar
    values.

    @raise Invalid_argument if [u] is {!max}. *)

val pred : t -> t
(** [pred u] is the scalar value before [u] in the set of Unicode scalar
    values.

    @raise Invalid_argument if [u] is {!min}. *)

val is_valid : int -> bool
(** [is_valid n] is [true] if and only if [n] is a Unicode scalar value
    (i.e. in the ranges [0x0000]...[0xD7FF] or [0xE000]...[0x10FFFF]).*)

val of_int : int -> t
(** [of_int i] is [i] as a Unicode character.

    @raise Invalid_argument if [i] does not satisfy {!is_valid}. *)

(**/**)
val unsafe_of_int : int -> t
(**/**)

val to_int : t -> int
(** [to_int u] is [u] as an integer. *)

val is_char : t -> bool
(** [is_char u] is [true] if and only if [u] is a latin1 OCaml character. *)

val of_char : char -> t
(** [of_char c] is [c] as a Unicode character. *)

val to_char : t -> char
(** [to_char u] is [u] as an OCaml latin1 character.

    @raise Invalid_argument if [u] does not satisfy {!is_char}. *)

(**/**)
val unsafe_to_char : t -> char
(**/**)

val equal : t -> t -> bool
(** [equal u u'] is [u = u']. *)

val compare : t -> t -> int
(** [compare u u'] is [Stdlib.compare u u']. *)

val seeded_hash : int -> t -> int
(** [seeded_hash seed u] A seeded hash function with the same output value as
    {!Hashtbl.seeded_hash}. This function allows this module to be passed as an
    argument to the functor {!Hashtbl.MakeSeeded}.

    @since 5.3 *)

val hash : t -> int
(** An unseeded hash function with the same output value as {!Hashtbl.hash}.
    This function allows this module to be passed as an argument to the functor
    {!Hashtbl.Make}.
    @before 5.3 The hashing algorithm was different.
    Use [Hashtbl.rebuild] for stored tables which used this hashing
    function *)

(** {1:utf UTF codecs tools}

    @since 4.14 *)

type utf_decode [@@immediate]
(** The type for UTF decode results. Values of this type represent
    the result of a Unicode Transformation Format decoding attempt. *)

val utf_decode_is_valid : utf_decode -> bool
(** [utf_decode_is_valid d] is [true] if and only if [d] holds a valid
    decode. *)

val utf_decode_uchar : utf_decode -> t
(** [utf_decode_uchar d] is the Unicode character decoded by [d] if
    [utf_decode_is_valid d] is [true] and {!Uchar.rep} otherwise. *)

val utf_decode_length : utf_decode -> int
(** [utf_decode_length d] is the number of elements from the source
    that were consumed by the decode [d]. This is always strictly
    positive and smaller or equal to [4]. The kind of source elements
    depends on the actual decoder; for the decoders of the standard
    library this function always returns a length in bytes. *)

val utf_decode : int -> t -> utf_decode
(** [utf_decode n u] is a valid UTF decode for [u] that consumed [n]
    elements from the source for decoding. [n] must be positive and
    smaller or equal to [4] (this is not checked by the module). *)

val utf_decode_invalid : int -> utf_decode
(** [utf_decode_invalid n] is an invalid UTF decode that consumed [n]
    elements from the source to error. [n] must be positive and
    smaller or equal to [4] (this is not checked by the module). The
    resulting decode has {!rep} as the decoded Unicode character. *)

val utf_8_decode_length_of_byte : char -> int
(** [utf_8_decode_length_of_byte byte] is the number of bytes, from 1
    to {!max_utf_8_decode_length}, that a valid UTF-8 decode starting
    with byte [byte] would consume or [0] if [byte] cannot start a
    valid decode.

    @since 5.4 *)

val max_utf_8_decode_length : int
(** [max_utf_8_decode_length] is [4], the maximal number of bytes
    a valid or invalid UTF-8 decode can consume.

    @since 5.4 *)

val utf_8_byte_length : t -> int
(** [utf_8_byte_length u] is the number of bytes needed to encode
    [u] in UTF-8. *)

val utf_16_byte_length : t -> int
(** [utf_16_byte_length u] is the number of bytes needed to encode
    [u] in UTF-16. *)