import { Transform } from "stream";
import { omit, pickBy, sortBy } from "lodash-es";
import * as n3 from "n3";
import Pumpify from "pumpify";
import type { Duplex } from "stream";
import type { AnyTdbDataFactory } from "./DataFactory.ts";
import { factories } from "./DataFactory.ts";
import type { Quad } from "./Terms.ts";

export interface ParseOptions {
  format?: "turtle" | "trig" | "n-triples" | "n-quads" | "n3";
  factory?: AnyTdbDataFactory;
  baseIri?: string;
  onPrefix?: (prefixes: Record<string, string>) => void;
}

/**
 * Reset the internal bnode IDs used by the parser. Only used for testing purposes
 * Note that the datafactory has it's own additional bnode counter
 */
export function resetParserBlanknodeCounter() {
  (n3.Parser as any)._resetBlankNodePrefix();
}

function parseOptionsToN3Options(opts?: ParseOptions) {
  return {
    format: opts?.format,
    factory: opts?.factory || factories.compliant,
    baseIRI: opts?.baseIri,
  };
}

function getParser(options?: n3.ParserOptions): n3.Parser {
  const parser = new n3.Parser(options);
  /**
   * We want to handle IRI validation errors ourselves (using the IRI parser),
   * instead of N3 throwing errors and aborting the parsing.
   * To achieve this we loosen the N3-Lexer iri and unescapedIri regex to allow for more characters
   * Some details about the kinds of regular expressions:
   * - The `unescapedIri` regex is used to parse IRIs with unicode escape sequences (e.g. `\u0000`)
   * - The `iri` regex is used to parse all other IRIs
   *
   * Note1: When changing these regular expression, make sure to test them inside a clone of the N3 repo as well.
   * You'll get a few tests that fail (logical, as we decreased the strictness), but it serves as a good sanity check to verify whether we didn't break the parsing completely
   *
   * Note2: Our approach is incomplete, as there is a third regex in the Lexer called `illegalIriChars`
   * This regex is applied as a sanity check by N3 on IRIs with unicode escape sequences. We cannot change this regex
   * as we cannot access its scope.
   * This means that some IRIs with 1) unicode escape sequences  and 2) a different syntax error (e.g. a space) cause N3 to
   * throw an error (instead of the error being handled by our IRI parser).
   * We accept this limitation for now
   */
  // Original regex in N3: /^<((?:[^ <>{}\\]|\\[uU])+)>[ \t]*/; // IRI with escape sequences; needs sanity check after unescaping
  (parser as any)._lexer._iri = /^<((?:[^<>\\]|\\[uU])+)>[ \t]*/;
  // Original regex in N3: = /^<([^\x00-\x20<>\\"\{\}\|\^\`]*)>[ \t]*/; // IRI without escape sequences; no unescaping
  (parser as any)._lexer._unescapedIri = /^<([^<>\\]*)>[ \t]*/;
  return parser;
}
export function parse<F extends AnyTdbDataFactory>(inputString: string, opts?: ParseOptions) {
  const parser = getParser(parseOptionsToN3Options(opts));
  return parser.parse(inputString, null, (prefix, iri) => {
    opts?.onPrefix?.({ [prefix]: iri.value });
  }) as unknown[] as Array<ReturnType<F["quad"]>>;
}

/**
 * Wrap a datafactory function. If the function throws an error return that.
 * That way, we avoid throwing the error in the N3 stream (not supported), and we can throw it later in
 * a transform stream
 */
function wrapDatafactoryFunction(func: Function): any {
  return (...args: any[]) => {
    for (const arg of args) {
      if (arg instanceof Error) return args[0];
    }
    try {
      return func(...args);
    } catch (e) {
      return e;
    }
  };
}

/**
 * We need to wrap the datafactory because:
 * - Our datafactory may throw errors
 * - These errors are not handled gracefully by the N3 streaming parser, see https://github.com/rdfjs/N3.js/issues/308
 *
 * As a result, we're wrapping the datafactory methods in a try catch, and returning the error instead of throwing it
 * In our stream implementation, we're then emitting this as an error
 */

function wrapDatafactory(datafactory: AnyTdbDataFactory): AnyTdbDataFactory {
  return {
    ...datafactory,
    // Only wrapping function that may potentially throw an error
    literal: wrapDatafactoryFunction(datafactory.literal),
    namedNode: wrapDatafactoryFunction(datafactory.namedNode),
    quad: (...args: any[]) => {
      for (const arg of args) {
        if (arg instanceof Error) return arg;
      }
      try {
        return (datafactory.quad as any)(...args);
      } catch (e) {
        return e;
      }
    },
  };
}

export interface ParsingContext {
  line: number;
  lines?: { [line: number]: string };
  token?: Token;
  previousToken?: Token;
}

/**
 * Use separate interface for streamparser, as we don't want pumpify types exposed, and because we want
 * to type the `data` event properly
 */
const contextSize = 5;
const maxLineLength = 500;
export interface StreamParser<Q extends Quad> extends Duplex {
  on(event: "data", cb: (quad: Q) => void): this;
  on(event: string, cb: Function): this;
}

interface N3Error extends Error {
  context: ParsingContext;
}

export interface Token {
  line: number;
  prefix: string;
  type: string;
  value: string;
}

export class StreamParser<Q extends Quad> extends Transform {
  parser: n3.Parser;
  onData?: (data: any) => void;
  onEnd?: () => void;
  lineCount: number = 0;
  remainderLastChunk: string = "";
  currentChunk?: string;
  constructor(options?: n3.ParserOptions) {
    super({ decodeStrings: true, readableObjectMode: true });
    this.parser = getParser(options);

    this.parser.parse(
      // Pass dummy stream to obtain `data` and `end` callbacks
      {
        on: (event: string, cb: any) => {
          switch (event) {
            case "data":
              this.onData = cb;
              break;
            case "end":
              this.onEnd = cb;
              break;
            case "error":
            default:
              break;
          }
        },
      } as any,
      // Handle triples by pushing them down the pipeline
      (error, t) => {
        if (error) {
          this.emit("error", this._processError(error as any as N3Error));
        } else if (t) {
          this.push(t);
        }
      },
      (prefix, iri) => {
        this.emit("prefix", prefix, iri.value);
      },
    );
  }
  _transform(chunk: any, _encoding: any, done: Function) {
    //Reset line count to be the same as the lexer, just to be sure we're not out-of-sync
    this.lineCount = (this.parser as any)._lexer._line;
    this.remainderLastChunk = (this.parser as any)._lexer._input || "";
    this.currentChunk = chunk;
    this.onData?.(chunk);
    done();
  }
  _processError(e: N3Error) {
    const chunk = this.remainderLastChunk + this.currentChunk;
    const lines = chunk.split("\n");
    if (this.lineCount > 1) {
      lines.shift(); //remove this one, as it is probably a partial line...
      this.lineCount++; //can modify linecount, as it gets reset in transform anyway
    }
    // Always remove last line as well (as long as its not the erroronemous line). It might well be an incomplete line
    // downside, we always loose the very last line item..
    if (e.context.line !== lines.length + this.lineCount - 1) lines.pop();
    const numberedSlice: ParsingContext["lines"] = {};
    lines.forEach((val, key) => {
      numberedSlice[key + this.lineCount] = val;
    });
    const lineNumbers = sortBy(Object.keys(numberedSlice).map((key) => +key));
    const start = lineNumbers[0];
    const end = lineNumbers[lineNumbers.length - 1];
    const selectedSlice: ParsingContext["lines"] = pickBy(numberedSlice, (val, key: any) => {
      return key >= e.context.line - contextSize && key <= e.context.line + contextSize && val.length < maxLineLength;
    });

    const removeKeys: number[] = [];
    var remove = false;
    for (var i = e.context.line; i >= start; i--) {
      if (remove) {
        removeKeys.push(i);
      } else {
        if (selectedSlice[i] === undefined) {
          remove = true;
        }
      }
    }
    var remove = false;
    for (var i = e.context.line; i <= end; i++) {
      if (remove) removeKeys.push(i);
      if (!selectedSlice[i]) {
        remove = true;
      }
    }

    const sequentialSlice: ParsingContext["lines"] = omit(selectedSlice, removeKeys);
    //our error line might be gone, because it's too long. so, check first
    e.context.lines = sequentialSlice[e.context.line] ? sequentialSlice : undefined;
    return e;
  }
  _flush(done: Function) {
    this.onEnd?.();
    done();
  }
}

export function getStreamParser<Q extends Quad>(options?: ParseOptions): StreamParser<Q> {
  const n3Options = parseOptionsToN3Options(options);
  n3Options.factory = wrapDatafactory(n3Options.factory);

  const streamParser = new StreamParser(n3Options);
  streamParser.on("prefix", (prefix: string, iri: string) => {
    options?.onPrefix?.({ [prefix]: iri });
  });
  return new Pumpify.obj(
    streamParser,
    new Transform({
      objectMode: true,
      transform: (quad: any, _encoding, cb) => {
        if (quad instanceof Error) return cb(quad);
        return cb(null, quad);
      },
    }),
  ) as any;
}
