import type * as RdfJs from "@rdfjs/types";
import type { Writable } from "ts-essentials";
import { lexicalToValue } from "@triplydb/recognized-datatypes";
import type { DATA_TYPE_NAME_MAPPING } from "@triplydb/recognized-datatypes/utils/constants";
import { isNumericDatatype, SUB_TYPES } from "@triplydb/recognized-datatypes/utils/constants";

export type ValidationStatus = "unrecognized" | "canonical" | "disabled";
interface TermBase {
  /**
   * Each term has it's own ID, making object comparison easier (e.g. when using terms in a JS Set or Map)
   * Includes all properties from the RDF spec, but may not include additional term properties like validationStatus
   */
  id: string;

  /**
   * Each term has it's own Triply-specific (non-standardized) string serialization. This is primarily used to
   * serialize terms before they are stored in HDT files. It is intended to be used with the corresponding `fromString`
   * function of this data-factory.
   *
   * Inspired by the rdf-string.js library (https://github.com/rubensworks/rdf-string.js/tree/master)
   */
  rdfString: string;

  /**
   * String to use for comparing the order of terms.
   * See https://issues.triply.cc/issues/7597.
   *
   * Different terms should sort like this:
   * > SPARQL also fixes an order between some kinds of RDF terms that would not otherwise be ordered:
   * >  1. (Lowest) no value assigned to the variable or expression in this solution.
   * >  2. Blank nodes
   * >  3. IRIs
   * >  4. RDF literals
   * from https://www.w3.org/TR/sparql11-query/#modOrderBy
   *
   * @DECISION We use lexical sorting, and the lexical representations used for sorting look like this:
   *
   *    cmpString    := iri | blank-node | literal | variable | default-graph | quad
   *    default-graph := ''
   *    blank-node    := 'b' delimiter blank-node-name
   *    iri           := 'i' delimiter iri-canonical
   *    literal       := 'l' delimiter literal-repr
   *    quad          := 'q' delimiter 'undefined'
   *    variable      := 'v' delimiter variable-name
   *
   *    delimiter    := \x01
   *
   * Because `b` sorts before `i`, which in turn sorts before `l`, we adhere to the
   * standard. Additionally, variables are sorted after quads, which sort after literals.
   * The default graph sorts first, and we don't define an ordering on quads (because it
   * would be tedious and we don't want to use it anyway).
   */
  cmpString: string;

  /**
   * Possible values:
   * - "canonical" we managed to parse the lexical value and make it canonical
   * - "unrecognized" when the term or datatype is not recognized
   * - "disabled" when validation was disabled completely
   */
  validationStatus: ValidationStatus;
}
export type Term = BlankNode | Literal | NamedNode | DefaultGraph | Quad<BaseQuad> | Variable;

/**
 *  Used in date serialization in cases where the value is bound and small in value (e.g. hours, months, minutes)
 */
export function padLeftTwo(value: number): string {
  return `${value}`.padStart(2, "0");
}

function normalizeTimeZone(timezone: number | undefined): string {
  return timezone === undefined ? "" : padLeftTwo(timezone);
}

export interface TermTypeToTerm {
  NamedNode: NamedNode;
  BlankNode: BlankNode;
  Variable: Variable;
  Quad: Quad<BaseQuad>;
  Literal: Literal;
  DefaultGraph: DefaultGraph;
}

export interface TermTypesToTerms<T extends typeof BASE_QUAD_TERM_TYPES> {
  subject: TermTypeToTerm[T["subject"][number]];
  predicate: TermTypeToTerm[T["predicate"][number]];
  object: TermTypeToTerm[T["object"][number]];
  graph: TermTypeToTerm[T["graph"][number]];
}
export type TermType = keyof TermTypeToTerm;

export const BASE_QUAD_TERM_TYPES = {
  subject: ["NamedNode" as const, "BlankNode" as const, "Variable" as const, "Quad" as const],
  predicate: ["NamedNode" as const, "Variable" as const],
  object: ["NamedNode" as const, "BlankNode" as const, "Variable" as const, "Literal" as const, "Quad" as const],
  graph: ["NamedNode" as const, "BlankNode" as const, "Variable" as const, "DefaultGraph" as const],
};

export type BaseQuad = TermTypesToTerms<typeof BASE_QUAD_TERM_TYPES>;

export type QuadPosition = "subject" | "predicate" | "object" | "graph";

export class NamedNode<Iri extends string = string> implements RdfJs.NamedNode<Iri>, TermBase {
  public static readonly termType = "NamedNode" as const;
  public readonly termType = NamedNode.termType;
  public readonly value: Iri;

  get unsafeMutable() {
    // using a getter, as this will avoid this popping up as circular dependency in json stringification output,
    // such as mocha
    return this as Writable<typeof this>;
  }
  public readonly validationStatus: ValidationStatus;
  public constructor(iri: Iri, validationStatus: ValidationStatus) {
    this.validationStatus = validationStatus;
    this.value = iri;
  }
  public get id() {
    return this.value;
  }
  public get rdfString() {
    return this.value;
  }
  public equals(other?: RdfJs.Term): boolean {
    return !!other && this.termType === other.termType && this.value === other.value;
  }
  public get cmpString() {
    return "i\x01" + this.value.replace(/\x01/g, "\x02");
  }
}

function zeroPad(num: number, untilLength: number, sideToPad: "left" | "right") {
  let value = `${num}`;
  while (value.length < untilLength) {
    if (sideToPad === "left") value = "0" + value;
    else if (sideToPad === "right") value = value + "0";
  }
  return value;
}

function invert(digits: string) {
  return digits
    .split("")
    .map((k) => 9 - parseInt(k))
    .join("");
}

export function lexSerializeNumericString(numericString: string) {
  // Creates a lexicographic representation of a number,
  // with a descending left to right order of importance.

  const val = Number(numericString);
  const exponent = parseInt(val.toExponential().split("e")[1]);
  const valueIsZero = val === 0;
  const valueIsPositive = val > 0;
  const valueIsNegative = val < 0;
  const exponentIsZero = exponent === 0;
  const exponentIsPositive = exponent > 0;
  const exponentIsNegative = exponent < 0;

  const mantissaLex = zeroPad(Math.abs(parseInt(val.toExponential().split("e")[0].replace(".", ""))), 20, "right");
  const exponentLex = zeroPad(Math.abs(exponent), 4, "left");

  // Here we invert the representation of the exponent,
  // see https://git.triply.cc/triply/hdt/-/tree/master/prolog/sparql#95-serialization-of-numbers

  if (valueIsNegative && exponentIsPositive) {
    return `++${invert(exponentLex)}${invert(mantissaLex)}`;
  }
  if (valueIsNegative && exponentIsZero) {
    return `+,${exponentLex}${invert(mantissaLex)}`;
  }
  if (valueIsNegative && exponentIsNegative) {
    return `+-${exponentLex}${invert(mantissaLex)}`;
  }
  if (valueIsZero) {
    return `,,${exponentLex}${mantissaLex}`;
  }
  if (valueIsPositive && exponentIsNegative) {
    return `-+${invert(exponentLex)}${mantissaLex}`;
  }
  if (valueIsPositive && exponentIsZero) {
    return `-,${exponentLex}${mantissaLex}`;
  }
  if (valueIsPositive && exponentIsPositive) {
    return `--${exponentLex}${mantissaLex}`;
  }
  throw new Error("Impossible");
}

export class Literal<Iri extends string = string> implements RdfJs.Literal, TermBase {
  public static readonly termType = "Literal" as const;
  public readonly termType = Literal.termType;
  public readonly validationStatus: ValidationStatus;
  public readonly value: string;

  get unsafeMutable() {
    // using a getter, as this will avoid this popping up as circular dependency in json stringification output,
    // such as mocha
    return this as Writable<typeof this>;
  }
  public readonly language: string = "";
  public readonly datatype: NamedNode<Iri>;

  public constructor(
    value: string,
    validationStatus: ValidationStatus,
    datatype: NamedNode<Iri>,
    language: string | undefined,
  ) {
    this.validationStatus = validationStatus;
    this.value = value;
    this.datatype = datatype;
    if (language) this.language = language;
  }
  public get id() {
    if (this.language) {
      return `"${this.value}"@${this.language}`;
    } else {
      return `"${this.value}"^^${this.datatype.id}`;
    }
  }
  public get rdfString() {
    if (this.language) {
      return `"${this.value}"@${this.language}`;
    } else if (this.datatype.equals(xsdString)) {
      // We've decided to store xsd:string type literals as simple literals, i.e. without a datatype, for the
      // following reasons:
      // - It's serialized this way in rdf-string.js, and all of our HDT's.
      // - It's terser, saving some HDT space (not sure how much though).
      return `"${this.value}"`;
    } else {
      return `"${this.value}"^^${this.datatype.rdfString}`;
    }
  }

  public equals(other?: RdfJs.Term): boolean {
    return (
      other?.termType === "Literal" &&
      this.value === other.value &&
      this.language === other.language &&
      this.datatype.value === other.datatype.value
    );
  }

  /**
   * @DECISION We use the following representation for sorting literals:
   *
   *   literal      := 'l' delimiter literal-repr
   *   literal-repr := numeric | other | string
   *
   *   numeric      := encoding       delimiter iri
   *   other        := lexical-form   delimiter iri
   *   string       := lexical-string delimiter string-iri
   *
   *   string-iri   := langstring | iri
   *   langstring   := 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString-' language-tag
   *
   *   delimiter    := \x01
   *
   * `lexical-string` is defined as the lexical form of the string, with all occurrences of `delimiter`
   * replaced by the character that is next in ascii (i.e. \x02).
   *
   * Motivation:
   * - Some types are sub-types of other types, and should therefore sort by value (and not by iri).
   *   Therefore the iris must come _after_ the lexical-form.
   * - The SPARQL spec states that the ordering of language-tagged literals is undefined. We'd like
   *   to define it for the case when both language-tags are equal. This is more intuitive, as it means
   *   we can compare "Foo"@en vs "Bar"@en. Because it makes life easier, the lexical representation
   *   even sorts strings no-matter whether they're language tagged or not.
   * - We chose the delimiter s.t. it's unlikely to occur in a string value, but still works with gnu-sort.
   *   Unfortunately, we couldn't find a character that both works with gnu-sort and is illegal in strings
   *   according to https://www.w3.org/tr/xml11/#nt-char, so instead we chose a character that is discouraged.
   * - Sorting between datatypes isn't defined, and we want to keep the representation as simple as possible,
   *   so we don't mind different datatypes to end up all mixed together (but still sorted by lexical form).
   */
  public get cmpString() {
    if (SUB_TYPES.RDF_LANG_STRING.includes(this.datatype.value)) {
      return `l\x01${this.value.replace(/\x01/g, "\x02")}\x01${this.datatype.value.replace(/\x01/g, "\x02")}-${
        this.language
      }`;
    }

    if (SUB_TYPES.XSD_DATE_TIME.includes(this.datatype.value)) {
      /**
       *    Formats a time in ISO 8601 with up to nanosecond precision and trailing zeros. The format is precisely:
       *    YYYY-MM-DDTHH:mm:ss.sssssssssZ
       *
       *    2023-05-22T12:12:52.231251235Z
       *
       */

      const canonicalDate = lexicalToValue(
        this.value,
        this.datatype.value as (typeof DATA_TYPE_NAME_MAPPING)["XSD_DATE_TIME"],
      );

      const serializedYear = lexSerializeNumericString("" + canonicalDate.year);
      const serializedMonth = padLeftTwo(canonicalDate.month);
      const serializedDay = padLeftTwo(canonicalDate.day);
      const serializedHours = padLeftTwo(canonicalDate.hour);
      const serializedMinutes = padLeftTwo(canonicalDate.minute);
      const serializedSeconds = padLeftTwo(canonicalDate.second);

      const concatDate =
        serializedYear +
        serializedMonth +
        serializedDay +
        serializedHours +
        serializedMinutes +
        serializedSeconds +
        canonicalDate.fraction +
        normalizeTimeZone(canonicalDate.timezone);

      return `l\x01${concatDate}\x01${this.datatype.value}`;
    }

    if (SUB_TYPES.XSD_G_YEAR.includes(this.datatype.value)) {
      /**
       *    The format is:
       *    YYYY[TIMEZONE]OPTIONAL
       *
       *    2023Z
       *    2023+05:00
       *    2023-05:00
       *
       */
      const canonicalDate = lexicalToValue(
        this.value,
        this.datatype.value as (typeof DATA_TYPE_NAME_MAPPING)["XSD_G_YEAR"],
      );
      const serializedYear = lexSerializeNumericString("" + canonicalDate.year);
      const concatgYear = serializedYear + normalizeTimeZone(canonicalDate.timezone);

      return `l\x01${concatgYear}\x01${this.datatype.value}`;
    }

    if (SUB_TYPES.XSD_G_YEAR_MONTH.includes(this.datatype.value)) {
      /**
       *    The format is:
       *    YYYY-MM[TIMEZONE]OPTIONAL
       *
       *    2023-01Z
       *    2023-05+05:00
       *    2023-05-05:00
       *
       */
      const canonicalDate = lexicalToValue(
        this.value,
        this.datatype.value as (typeof DATA_TYPE_NAME_MAPPING)["XSD_G_YEAR_MONTH"],
      );

      const serializedYear = lexSerializeNumericString("" + canonicalDate.year);
      const serializedMonth = padLeftTwo(canonicalDate.month);
      const concatgYearMonth = serializedYear + serializedMonth + normalizeTimeZone(canonicalDate.timezone);

      return `l\x01${concatgYearMonth}\x01${this.datatype.value}`;
    }

    if (SUB_TYPES.XSD_DATE.includes(this.datatype.value)) {
      /**
       *    The format is:
       *    YYYY-MM-dd[TIMEZONE]OPTIONAL
       *
       *    2023-01-15Z
       *    2023-05-22+05:00
       *    2023-05-22-05:00
       *
       */
      const canonicalDate = lexicalToValue(
        this.value,
        this.datatype.value as (typeof DATA_TYPE_NAME_MAPPING)["XSD_DATE"],
      );

      const serializedYear = lexSerializeNumericString("" + canonicalDate.year);
      const serializedMonth = padLeftTwo(canonicalDate.month);
      const serializedDay = padLeftTwo(canonicalDate.day);

      const concatgYearMonthDay =
        serializedYear + serializedMonth + serializedDay + normalizeTimeZone(canonicalDate.timezone);

      return `l\x01${concatgYearMonthDay}\x01${this.datatype.value}`;
    }

    if (SUB_TYPES.XSD_DURATION.includes(this.datatype.value)) {
      /**
       * All encompassing serialization process for Duration and its subtypes.
       * Any Duration object that is parsed, is converted to a simplified MonthSeconds format.
       * See "durationToCanonical" ..packages/recognized-datatypes/src/xsd/dateTime.ts for more.
       */
      const canonicalDuration = lexicalToValue(
        this.value,
        this.datatype.value as (typeof DATA_TYPE_NAME_MAPPING)["XSD_DURATION"],
      );

      /**
       * @DECISION Converting Months to Seconds:
       * Since we need to compare xsd:Duration (and subtypes), and by design every Duration is converted to Months-Seconds,
       * to cover all possible cases we choose Seconds as a common denominator and further convert everything to that.
       *
       * Because we do not have information about which specific months we are converting (28/30/31 days?),
       * and we have to evaluate cases such as P1M < P32D or P4M < P123D, we use a conversion rate
       * that results in a number of seconds that is exactly in the middle.
       *
       * Therefore:
       * Seconds * Minutes * Hours in a day * Days in a month (by averaging days in a year(365.25) per month)
       * 60      * 60      * 24             * 30.4375   = 2629800
       *
       */

      const monthsToSeconds = canonicalDuration.months * 2629800;

      const totalTime = monthsToSeconds + canonicalDuration.seconds;
      const serializedSeconds = lexSerializeNumericString(
        `${canonicalDuration.isNegative ? "-" : ""}${totalTime}.${canonicalDuration.fraction}`,
      );

      return `l\x01${serializedSeconds}\x01${this.datatype.value}`;
    }

    if (!isNumericDatatype(this.datatype)) {
      return `l\x01${this.value.replace(/\x01/g, "\x02")}\x01${this.datatype.value.replace(/\x01/g, "\x02")}`;
    }

    // special cases for floats and doubles:
    // See https://www.w3.org/TR/2012/REC-xmlschema11-2-20120405/datatypes.html#f-specValCanMap for the canonical forms
    if (this.value === "-INF") {
      return "l\x01++/"; // '/' sorts before 0, so this should work
    } else if (this.value === "INF") {
      return "l\x01--\\"; // '\' sorts after 9, so this should work
    } else if (this.value === "NaN") {
      // @DECISION we're ignoring problems with lexical sorting here, but we'll
      //           work around them where feasible.
      //
      // > If either, or both, operands are NaN, false is returned.
      // from: // https://www.w3.org/TR/xpath-functions/#comp.numeric
      //
      // This isn't possible with lexical sorting, so we can just do whatever.
      // So let's choose a human friendly representation while we're at it.
      return "l\x01NaN";
    }

    // numeric. requires complex transformation to get a lexically-sortable representation
    return `l\x01${lexSerializeNumericString(this.value)}\x01${this.datatype.cmpString}`;
  }
}

export class BlankNode implements RdfJs.BlankNode, TermBase {
  public static readonly termType = "BlankNode" as const;
  public readonly termType = BlankNode.termType;
  public readonly value: string;

  get unsafeMutable() {
    // using a getter, as this will avoid this popping up as circular dependency in json stringification output,
    // such as mocha
    return this as Writable<typeof this>;
  }
  public readonly validationStatus: ValidationStatus = "canonical";
  public constructor(name: string) {
    this.value = name;
  }
  public get id() {
    return `_:${this.value}`;
  }
  public get rdfString() {
    return `_:${this.value}`;
  }
  public equals(other?: RdfJs.Term): boolean {
    return this.termType === other?.termType && this.value === other.value;
  }

  public get cmpString() {
    return "b\x01" + this.value;
  }
}

export class Variable implements RdfJs.Variable, TermBase {
  public static readonly termType = "Variable" as const;
  public readonly termType = Variable.termType;
  public readonly value: string;

  get unsafeMutable() {
    // using a getter, as this will avoid this popping up as circular dependency in json stringification output,
    // such as mocha
    return this as Writable<typeof this>;
  }
  public readonly validationStatus: ValidationStatus = "canonical";
  public constructor(name: string) {
    this.value = name;
  }
  public get id() {
    return `?${this.value}`;
  }
  public get rdfString() {
    return `?${this.value}`;
  }
  public equals(other?: RdfJs.Term): boolean {
    return this.termType === other?.termType && this.value === other.value;
  }

  public get cmpString() {
    return "v\x01" + this.value.replace(/\x01/g, "\x02");
  }
}

export class DefaultGraph implements RdfJs.DefaultGraph, TermBase {
  public static readonly termType = "DefaultGraph" as const;
  public static readonly id = "";
  public readonly termType = DefaultGraph.termType;
  public readonly value = "" as const;
  public readonly id = DefaultGraph.id;
  public readonly rdfString = this.value;
  public readonly cmpString = "";

  get unsafeMutable() {
    // using a getter, as this will avoid this popping up as circular dependency in json stringification output,
    // such as mocha
    return this as Writable<typeof this>;
  }
  public readonly validationStatus: ValidationStatus = "canonical";
  public equals(other?: RdfJs.Term): boolean {
    return this.termType === other?.termType;
  }
}

export class Quad<Q extends BaseQuad = BaseQuad> implements RdfJs.Quad, TermBase {
  public static readonly termType = "Quad" as const;
  public readonly termType = Quad.termType;
  /** This packages does not concern itself with ordering quads. */
  public readonly cmpString: string = "q\x01undefined";

  public readonly value = "" as const;
  get unsafeMutable() {
    // using a getter, as this will avoid this popping up as circular dependency in json stringification output,
    // such as mocha
    return this as Writable<typeof this>;
  }
  public readonly subject: Q["subject"];
  public readonly predicate: Q["predicate"];
  public readonly object: Q["object"];
  public readonly graph: Q["graph"];
  public readonly validationStatus: ValidationStatus;
  public constructor(
    subject: Q["subject"],
    predicate: Q["predicate"],
    object: Q["object"],
    graph: Q["graph"] | DefaultGraph,
  ) {
    this.subject = subject;
    this.predicate = predicate;
    this.object = object;
    this.graph = graph;
    const allTermsAreCanonical = [this.subject, this.predicate, this.object, this.graph].every(
      (t) => t.validationStatus === "canonical",
    );

    this.validationStatus = allTermsAreCanonical ? "canonical" : "unrecognized";
  }
  public get id(): string {
    return `${this.subject.id} ${this.predicate.id} ${this.object.id} ${this.graph.id} .`;
  }
  public get rdfString(): string {
    return `<${this.subject.rdfString} ${this.predicate.rdfString} ${this.object.rdfString}${
      this.graph.equals(new DefaultGraph()) ? "" : " " + this.graph.rdfString
    }>`;
  }

  // Returns whether this object represents the same quad as the other
  equals(other?: RdfJs.Term): boolean {
    return (
      other?.termType === "Quad" &&
      this.subject.equals(other.subject) &&
      this.predicate.equals(other.predicate) &&
      this.object.equals(other.object) &&
      this.graph.equals(other.graph)
    );
  }
}

const xsdString = new NamedNode("http://www.w3.org/2001/XMLSchema#string", "disabled");

export function termTypeOfRdfString(rdfString: string): Term["termType"] {
  switch (rdfString[0]) {
    case undefined:
      return "DefaultGraph";
    case '"':
      return "Literal";
    case "_":
      return "BlankNode";
    case "?":
      return "Variable";
    case "<":
      return "Quad";
    default:
      return "NamedNode";
  }
}
