import type * as RdfJs from "@rdfjs/types";
import { parseLanguageTag } from "@sozialhelden/ietf-language-tags";
import { memoize } from "lodash-es";
import memoizee from "memoizee";
import LruCache from "mnemonist/lru-cache.js";
// We're not using the built-in randomUUID from `crypto`,
// because that crypto function is not yet polyfilled by crypto-browserified (https://github.com/crypto-browserify/crypto-browserify)
// As a result, the browser playground won't run properly.
// We can start using the native `crypto` again when `crypto-browserified` supports randomUUID
import { v4 as randomUUID } from "uuid";
import { validate } from "@triplydb/iri";
import { isRecognized, lexicalToCanonical } from "@triplydb/recognized-datatypes";
import { BaseParseError } from "@triplydb/recognized-datatypes/Errors";
import type { DATA_TYPE_NAME_MAPPING } from "@triplydb/recognized-datatypes/utils/constants";
import { assertIsValidQuad, isValidTerm } from "./assertionUtils.ts";
import type { BaseQuad, Term, TermType, TermTypeToTerm, ValidationStatus } from "./Terms.ts";
import { BASE_QUAD_TERM_TYPES, BlankNode, DefaultGraph, Literal, NamedNode, Quad, Variable } from "./Terms.ts";

export interface RdfStringQuad {
  type: "QuadRdfStrings";
  subject: string;
  predicate: string;
  object: string;
  graph: string;
}

// Treating the `BaseParseError` of recognized datatypes as the main parse error that we're extending from when validating
// terms and their properties (including e.g. language tags)
export const ParseError = BaseParseError;
export class IriParseError extends ParseError {}
export class IllegalTermType extends BaseParseError {
  constructor(message: string) {
    super({ message });
  }
}

export class LanguageTagParseError extends ParseError {
  public languageTag: string;
  constructor(opts: { lexicalValue: string; message: string; languageTag: string }) {
    super(opts);
    this.languageTag = opts.languageTag;
  }
}

export interface FactoryOpts<
  S extends BaseQuad["subject"]["termType"],
  P extends BaseQuad["predicate"]["termType"],
  O extends BaseQuad["object"]["termType"],
  G extends BaseQuad["graph"]["termType"],
> {
  skipValidation?: boolean;
  /** by default a 5 char UUID */
  bnodePrefix?: string;
  /**
   * By default, named nodes construction are memoized. This is often a performance improvement, but it
   * may also result in a performance penalty in some cases.
   * This flag disable the memoization
   */
  disableMemoization?: boolean;
  /**
   * This callback fires when we have an invalid IRI. If replaceWith is returned, we will return that instead.
   */
  onInvalidIri?: (opts: { error: Error; value: string }) => { replaceWith: NamedNode<string> };

  /**
   * This callback fires when we have an invalid Literal. We may either substitute the literal (returning the values for the new one),
   * or we may want to re-throw the error
   */
  onInvalidLiteral?: (opts: {
    error: Error;
    value: string | number | boolean;
    datatype?: RdfJs.NamedNode;
    language?: string;
  }) => { replaceWith: Literal };
  /**
   * This callback fires when we don't have validation/canonicalization for a term.
   * E.g., a literal with datatype `https://something.org/datatype`
   */
  onUnsupportedTerm?: (term: Term) => void;
  /**
   * Specify the supported quad term types. Passing term types other than these will result in assertion errors
   * By default, the `BaseQuad` term types are used
   */
  quadTerms?: {
    subject: Array<S>;
    predicate: Array<P>;
    object: Array<O>;
    graph: Array<G>;
  };
}
/**
 * Check whether a language tag is valid. We're memoizing this, as the language-tags lib
 * does not seem very performant, and considering we don't expect datasets to contain many different languages
 */
export const isValidLanguageTag = memoizee(
  (languageTag: string) => {
    return !!parseLanguageTag(
      languageTag,
      // Make the lib return undefined if it's invalid
      true,
      // Don't print anything to the console
      null,
    );
  },
  { primitive: true, max: 100 },
);

export function getFactory<
  S extends BaseQuad["subject"]["termType"],
  P extends BaseQuad["predicate"]["termType"],
  O extends BaseQuad["object"]["termType"],
  G extends BaseQuad["graph"]["termType"],
>(factoryOpts?: FactoryOpts<S, P, O, G>) {
  const lruCache = new (LruCache as any)(1_000); // keep 1,000 most recently processed iris
  const allowedQuadPositions = factoryOpts?.quadTerms || BASE_QUAD_TERM_TYPES;
  let blankNodeCounter = 0;
  let bnodePrefix = factoryOpts?.bnodePrefix || randomUUID().substring(0, 4);
  type QuadTerms = {
    subject: TermTypeToTerm[S];
    predicate: TermTypeToTerm[P];
    object: TermTypeToTerm[O];
    graph: TermTypeToTerm[G];
  };

  function _namedNode<Iri extends string = string>(iri: Iri, opts?: { validationStatus?: ValidationStatus }) {
    let validationStatus = opts?.validationStatus;
    if (!validationStatus && factoryOpts?.skipValidation) validationStatus = "disabled";
    if (!validationStatus) {
      try {
        validate(iri);
        validationStatus = "canonical";
      } catch (error: any) {
        const replacement = factoryOpts?.onInvalidIri?.({ error, value: iri })?.replaceWith;
        if (replacement) return replacement as NamedNode<Iri>;
        throw new IriParseError({
          message: error.message,
          lexicalValue: iri,
          cause: error,
        });
      }
    }
    return new NamedNode<Iri>(iri, validationStatus);
  }

  let namedNode = _namedNode;
  if (!factoryOpts?.disableMemoization) {
    namedNode = memoize(_namedNode, (iri, opts) => `${iri}|${opts?.validationStatus}`);
    (namedNode as any).cache = lruCache;
  }

  const xsdString = namedNode("http://www.w3.org/2001/XMLSchema#string");
  const langTagString = namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString");

  function blankNode(name?: string) {
    return new BlankNode(name || `${bnodePrefix}${blankNodeCounter++}`);
  }

  /**
   * Internal function for creating a literal. We're wrapping this in the actual literal function to provide a fallback mechanism
   * in case the original literal was invalid.
   */
  function createLiteral(
    value: string | number | boolean,
    languageOrDataType: string | RdfJs.NamedNode | undefined,
    opts?: { validationStatus?: ValidationStatus },
  ): Literal {
    value = typeof value === "string" ? value : `${value}`;
    let language: string | undefined;
    let datatype: NamedNode;
    if (typeof languageOrDataType === "string") {
      if (!opts?.validationStatus && !factoryOpts?.skipValidation && !isValidLanguageTag(languageOrDataType)) {
        throw new LanguageTagParseError({
          languageTag: languageOrDataType,
          lexicalValue: value,
          message: `Invalid language tag '${languageOrDataType}'`,
        });
      }
      // @DECISION We're treating language tags as lower-case, even though the SPARQL spec doesn't mandate this. It
      // simplifies comparisons in the engine, and gets closer to a "canonical" form.
      language = languageOrDataType.toLowerCase();
      datatype = langTagString;
    } else {
      datatype = languageOrDataType ? fromTerm(languageOrDataType) : xsdString;
    }
    let validationStatus = opts?.validationStatus;
    if (!validationStatus && factoryOpts?.skipValidation) validationStatus = "disabled";
    if (!validationStatus) {
      if (isRecognized(datatype)) {
        value = lexicalToCanonical(value, datatype);
        validationStatus = "canonical";
      } else {
        validationStatus = "unrecognized";
      }
    }
    const returnedLiteral = new Literal(value, validationStatus, datatype, language);

    if (validationStatus === "unrecognized") factoryOpts?.onUnsupportedTerm?.(returnedLiteral);
    return returnedLiteral;
  }

  function literal<D extends string>(
    value: string | number | boolean,
    datatype: RdfJs.NamedNode<D>,
    opts?: { validationStatus?: ValidationStatus },
  ): Literal<D>;
  function literal<D extends string>(
    value: string | number | boolean,
    language: string,
    opts?: { validationStatus?: ValidationStatus },
  ): Literal<typeof DATA_TYPE_NAME_MAPPING.RDF_LANG_STRING>;
  function literal(value: string): Literal<typeof DATA_TYPE_NAME_MAPPING.XSD_STRING>;
  function literal(
    value: string,
    languageOrDataType?: string | RdfJs.NamedNode,
    opts?: { validationStatus?: ValidationStatus },
  ): Literal;
  function literal(
    value: number | boolean,
    languageTagOrDatatype: string | RdfJs.NamedNode,
    opts?: { validationStatus?: ValidationStatus },
  ): Literal;
  function literal(
    value: string | number | boolean,
    languageOrDataType?: string | RdfJs.NamedNode,
    opts?: { validationStatus?: ValidationStatus },
  ) {
    if (!factoryOpts?.onInvalidLiteral) return createLiteral(value, languageOrDataType, opts);
    try {
      return createLiteral(value, languageOrDataType, opts);
    } catch (e: any) {
      let language, datatype;
      if (typeof languageOrDataType === "string") {
        language = languageOrDataType;
      } else {
        datatype = languageOrDataType;
      }
      const { replaceWith } = factoryOpts.onInvalidLiteral({ error: e, value, language, datatype });
      return replaceWith;
    }
  }

  function variable(name: string) {
    return new Variable(name);
  }
  function randomVariable() {
    const randomUUIDWithoutDashes = randomUUID().replace(/-/g, "");
    return variable(randomUUIDWithoutDashes);
  }

  const DEFAULT_GRAPH = new DefaultGraph();
  function defaultGraph() {
    return DEFAULT_GRAPH;
  }

  function fromTerm<T extends RdfJs.Term>(
    term: T,
    opts?: { validationStatus?: ValidationStatus },
  ): TermTypeToTerm[T["termType"]];
  function fromTerm(term: RdfJs.Term | Term, opts?: { validationStatus?: ValidationStatus }): Term {
    // Assuming that when a term has the field validationStatus, that it is one of our terms.
    // Decided not to do instanceOf checks, as these may result in false negatives, when we're dealing with multiple datafactory dependencies with different versions
    if ("validationStatus" in term) return term;
    switch (term.termType) {
      case "BlankNode":
        return blankNode(term.value);
      case "DefaultGraph":
        return defaultGraph();
      case "Literal":
        return literal(term.value, term.language || fromTerm(term.datatype, opts));
      case "NamedNode":
        return namedNode(term.value, opts);
      case "Variable":
        return variable(term.value);
      case "Quad":
        return fromQuad(term as RdfJs.Quad);
    }
  }

  function fromQuad(term: RdfJs.Quad): Quad<QuadTerms> {
    if (term instanceof Quad) return term;
    return quad(
      fromTerm(term.subject) as any,
      fromTerm(term.predicate) as any,
      fromTerm(term.object) as any,
      fromTerm(term.graph) as any,
    );
  }
  function assertCorrectFromStringTermType(actualTermType: TermType, allowedTermTypes: Set<TermType> | undefined) {
    if (allowedTermTypes && !allowedTermTypes.has(actualTermType))
      throw new IllegalTermType(`Failed to create term from string: term type ${actualTermType} is not allowed`);
  }
  function fromRdfStringQuad(rdfStringQuad: RdfStringQuad) {
    // Any casts, as quad validates the term types
    return quad(
      fromString(rdfStringQuad.subject) as any,
      fromString(rdfStringQuad.predicate) as any,
      fromString(rdfStringQuad.object) as any,
      fromString(rdfStringQuad.graph) as any,
    );
  }
  function fromString<T extends TermType>(value: string, opts: { termTypes: Set<T> }): TermTypeToTerm[T];
  function fromString(value: string): Term;
  function fromString(value: string, opts?: { termTypes: Set<TermType> }): Term {
    if (!value) {
      assertCorrectFromStringTermType("DefaultGraph", opts?.termTypes);
      return defaultGraph();
    }
    switch (value[0]) {
      case "_":
        assertCorrectFromStringTermType("BlankNode", opts?.termTypes);
        return blankNode(value.slice(2));
      case "?":
        assertCorrectFromStringTermType("Variable", opts?.termTypes);
        return variable(value.slice(1));
      case '"':
        assertCorrectFromStringTermType("Literal", opts?.termTypes);
        // For faster parsing, we omit using regex in lieu of finding the closing quote. This is only possible
        // since we know quotation marks won't appear in language tags or datatype IRI's. See syntax sections
        // in the following standards for assurance:
        //   - BCP47   : https://www.rfc-editor.org/bcp/bcp47.txt
        //   - RFC3987 : https://www.ietf.org/rfc/rfc3987.txt
        const lastQuote = value.lastIndexOf('"');
        const literalValue = value.slice(1, lastQuote);
        if (value[lastQuote + 1] === "@") return literal(literalValue, value.slice(lastQuote + 2));
        if (value[lastQuote + 1] === "^") return literal(literalValue, namedNode(value.slice(lastQuote + 3)));
        return literal(literalValue);
      case "<":
        assertCorrectFromStringTermType("Quad", opts?.termTypes);
        const subjectWhitespace = value.indexOf(" ");
        const predicateWhitespace = value.indexOf(" ", subjectWhitespace + 1);
        const objectWhitespace = value.indexOf(" ", predicateWhitespace + 1);

        const subject = fromString(value.slice(1, subjectWhitespace)) as TermTypeToTerm[S];
        const predicate = fromString(value.slice(subjectWhitespace + 1, predicateWhitespace)) as TermTypeToTerm[P];
        const object = fromString(value.slice(predicateWhitespace + 1, objectWhitespace)) as TermTypeToTerm[O];
        const graph =
          objectWhitespace > -1 ? (fromString(value.slice(objectWhitespace + 1, -1)) as TermTypeToTerm[G]) : undefined;

        return quad(subject, predicate, object, graph);
      default:
        assertCorrectFromStringTermType("NamedNode", opts?.termTypes);
        return namedNode(value);
    }
  }

  function isValidSubject(term: Term): term is TermTypeToTerm[S] {
    return isValidTerm(term, allowedQuadPositions.subject);
  }
  function isValidPredicate(term: Term): term is TermTypeToTerm[P] {
    return isValidTerm(term, allowedQuadPositions.predicate);
  }
  function isValidObject(term: Term): term is TermTypeToTerm[O] {
    return isValidTerm(term, allowedQuadPositions.object);
  }
  function isValidGraph(term: Term): term is TermTypeToTerm[G] {
    return isValidTerm(term, allowedQuadPositions.graph);
  }

  function quad<Su extends TermTypeToTerm[S], Pr extends TermTypeToTerm[P], Ob extends TermTypeToTerm[O]>(
    subject: Su,
    predicate: Pr,
    object: Ob,
  ): Quad<{ subject: Su; predicate: Pr; object: Ob; graph: DefaultGraph & TermTypeToTerm[G] }>;
  function quad<
    Su extends TermTypeToTerm[S],
    Pr extends TermTypeToTerm[P],
    Ob extends TermTypeToTerm[O],
    Gr extends TermTypeToTerm[G],
  >(subject: Su, predicate: Pr, object: Ob, graph: Gr): Quad<{ subject: Su; predicate: Pr; object: Ob; graph: Gr }>;
  function quad(
    subject: TermTypeToTerm[S],
    predicate: TermTypeToTerm[P],
    object: TermTypeToTerm[O],
    graph?: TermTypeToTerm[G],
  ): Quad<QuadTerms>;
  function quad(
    subject: TermTypeToTerm[S],
    predicate: TermTypeToTerm[P],
    object: TermTypeToTerm[O],
    graph?: TermTypeToTerm[G],
  ) {
    assertIsValidQuad<S, P, O, G>(factoryOpts?.quadTerms, subject, predicate, object, graph);
    return new Quad<QuadTerms>(subject, predicate, object, graph || DEFAULT_GRAPH);
  }
  function resetBnodeCounter() {
    blankNodeCounter = 0;
  }

  function prefixer<Prefix extends string>(base: NamedNode<Prefix> | Prefix) {
    const baseString = typeof base === "string" ? base : base.value;
    return function prefix<Suffix extends string>(local: Suffix) {
      return namedNode(`${baseString}${local}`);
    };
  }

  const baseFactory = {
    literal,
    namedNode,
    blankNode,
    defaultGraph,
    quad,
    variable,
  } satisfies RdfJs.DataFactory;
  return {
    ...baseFactory,
    // Additional functionality we expose, on top of the DataFactory spec
    fromTerm,
    fromString,
    fromQuad,
    fromRdfStringQuad,
    randomVariable,
    resetBnodeCounter,
    prefixer,
    isValidSubject,
    isValidPredicate,
    isValidObject,
    isValidGraph,
  };
}

export const factories = {
  /**
   * Default factory instance
   */
  compliant: getFactory(),
  /**
   * A datafactory that assumes all terms are valid
   *
   */
  lenient: getFactory({
    skipValidation: true,
    // Given that most of the overhead is in validation, we may actually have a runtime decrease
    // when momoizing. So, disable it for the lenient factory.
    disableMemoization: true,
  }),
};

/**
 * We sometimes want a package to consume (as argument) any kind of factory, as long as it's _our_ datafactory
 */
export type AnyTdbDataFactory = ReturnType<typeof getFactory<any, any, any, any>>;
