import type { ITokenConfig as _ITokenConfig, TokenType as _TokenType } from "chevrotain";
import { createToken as origCreateToken, Lexer } from "chevrotain";
import { escapeRegExp } from "lodash-es";

type ITokenConfig<N extends string> = Omit<_ITokenConfig, "name"> & { name: N };
export type TokenType<N extends string> = Omit<_TokenType, "name"> & { name: N };
function createToken<N extends string>(config: ITokenConfig<N>) {
  return origCreateToken(config) as TokenType<N>;
}

// [155]  	EXPONENT ::= [eE] [+-]? [0-9]+
const EXPONENT = "[eE][+-]?[0-9]+";
// [146]  	INTEGER ::= [0-9]+
const INTEGER = "[0-9]+";
// [147]  	DECIMAL ::= [0-9]* '.' [0-9]+
const DECIMAL = "[0-9]*\\.[0-9]+";
// [148]  	DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT
const DOUBLE = `(([0-9]+\\.[0-9]*${EXPONENT})|(\\.[0-9]+${EXPONENT})|([0-9]+${EXPONENT}))`;
// [160]  	ECHAR ::= '\' [tbnrf\"']
const ECHAR = `\\\\[tbnrf\\\\"']`;
// [164]  	PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
const PN_CHARS_BASE =
  "A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}";

// [165]  	PN_CHARS_U ::= PN_CHARS_BASE | '_'
const PN_CHARS_U = `${PN_CHARS_BASE}_`;
// [167]  	PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
const PN_CHARS = `${PN_CHARS_U}\\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}`;
// [168]  	PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
const PN_PREFIX = `[${PN_CHARS_BASE}]([${PN_CHARS}\.]*[${PN_CHARS}])?`;

export const escapedPnLocalChars = `_~.!$&'()*+,;=/?#@%-`;
// [173]  	PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' )
const PN_LOCAL_ESC = `\\\\[${escapeRegExp(escapedPnLocalChars).replaceAll(`\\`, `\\\\`)}]`;

// [172]  	HEX ::= [0-9] | [A-F] | [a-f]
const HEX = `0-9A-Fa-f`;
// [171]  	PERCENT ::= '%' HEX HEX
const PERCENT = `%[${HEX}][${HEX}]`;
// [170]  	PLX ::= PERCENT | PN_LOCAL_ESC
const PLX = `(${PERCENT}|${PN_LOCAL_ESC})`;
// [169]  	PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )?
const PN_LOCAL = `([${PN_CHARS_U}:0-9]|${PLX})(([${PN_CHARS}\.:]|${PLX})*([${PN_CHARS}:]|${PLX}))?`;

function _keyword<N extends string>(name: N, category?: TokenType<N>) {
  const categories: TokenType<N>[] = [];
  if (category) categories.push(category);
  return createToken({ name: name, categories, pattern: new RegExp(name, "i") });
}

export const FuncArity0 = createToken({ name: "FuncArity0", pattern: Lexer.NA });
const FuncArity0Keywords = {
  Rand: _keyword("Rand", FuncArity0),
  Now: _keyword("Now", FuncArity0),
  Uuid: _keyword("Uuid", FuncArity0),
  Struuid: _keyword("Struuid", FuncArity0),
};
export type FuncArity0Name = Lowercase<keyof typeof FuncArity0Keywords>;

export const FuncArity1 = createToken({ name: "FuncArity1", pattern: Lexer.NA });
const { ...FuncArity1Keywords } = {
  Datatype: _keyword("Datatype", FuncArity1),
  Iri: _keyword("Iri", FuncArity1),
  Uri: _keyword("Uri", FuncArity1),
  Abs: _keyword("Abs", FuncArity1),
  Ceil: _keyword("Ceil", FuncArity1),
  Floor: _keyword("Floor", FuncArity1),
  Round: _keyword("Round", FuncArity1),
  Strlen: _keyword("Strlen", FuncArity1),
  Ucase: _keyword("Ucase", FuncArity1),
  Lcase: _keyword("Lcase", FuncArity1),
  EncodeForUri: _keyword("Encode_For_Uri", FuncArity1),
  Year: _keyword("Year", FuncArity1),
  Month: _keyword("Month", FuncArity1),
  Day: _keyword("Day", FuncArity1),
  Hours: _keyword("Hours", FuncArity1),
  Minutes: _keyword("Minutes", FuncArity1),
  Seconds: _keyword("Seconds", FuncArity1),
  Timezone: _keyword("Timezone", FuncArity1),
  Tz: _keyword("Tz", FuncArity1),
  Md5: _keyword("Md5", FuncArity1),
  Sha1: _keyword("Sha1", FuncArity1),
  Sha256: _keyword("Sha256", FuncArity1),
  Sha384: _keyword("Sha384", FuncArity1),
  Sha512: _keyword("Sha512", FuncArity1),
  IsIri: _keyword("IsIri", FuncArity1),
  IsUri: _keyword("IsUri", FuncArity1),
  IsBlank: _keyword("IsBlank", FuncArity1),
  IsLiteral: _keyword("IsLiteral", FuncArity1),
  IsNumeric: _keyword("IsNumeric", FuncArity1),
  Lang: _keyword("Lang", FuncArity1),
  Str: _keyword("Str", FuncArity1),
};

// This seems to be a bug in typescripts `Lowercase`??
export type FuncArity1Name = "encode_for_uri" | Exclude<Lowercase<keyof typeof FuncArity1Keywords>, "encodeforuri">;

export const FuncArity2 = createToken({ name: "FuncArity2", pattern: Lexer.NA });
const FuncArity2Keywords = {
  LangMatches: _keyword("LangMatches", FuncArity2),
  Contains: _keyword("Contains", FuncArity2),
  Strstarts: _keyword("Strstarts", FuncArity2),
  Strends: _keyword("Strends", FuncArity2),
  Strbefore: _keyword("Strbefore", FuncArity2),
  Strafter: _keyword("Strafter", FuncArity2),
  Strlang: _keyword("Strlang", FuncArity2),
  Strdt: _keyword("Strdt", FuncArity2),
  SameTerm: _keyword("SameTerm", FuncArity2),
};
export type FuncArity2Name = Lowercase<keyof typeof FuncArity2Keywords>;

export const Keywords = {
  ...FuncArity2Keywords,
  ...FuncArity0Keywords,
  ...FuncArity1Keywords,
  Asc: _keyword("Asc"),
  Bound: _keyword("Bound"),
  Exists: _keyword("Exists"),
  Not: _keyword("Not"),
  Substr: _keyword("Substr"),
  Replace: _keyword("Replace"),
  Regex: _keyword("Regex"),
  Construct: _keyword("Construct"),

  Base: _keyword("Base"),
  By: _keyword("By"),
  With: _keyword("With"),
  Order: _keyword("Order"),
  To: _keyword("To"),
  Clear: _keyword("Clear"),
  Default: _keyword("Default"),
  Named: _keyword("Named"),
  Describe: _keyword("Describe"),
  Desc: _keyword("Desc"),
  Ask: _keyword("Ask"),
  As: _keyword("As"),
  All: _keyword("All"),
  Drop: _keyword("Drop"),
  Create: _keyword("Create"),
  Add: _keyword("Add"),
  Coalesce: _keyword("Coalesce"),
  Concat: _keyword("Concat"),
  Bnode: _keyword("Bnode"),
  Move: _keyword("Move"),
  Copy: _keyword("Copy"),
  Insert: _keyword("Insert"),
  Data: _keyword("Data"),
  Delete: _keyword("Delete"),
  Load: _keyword("Load"),
  Into: _keyword("Into"),
  Bind: _keyword("Bind"),
  Using: _keyword("Using"),
  Distinct: _keyword("Distinct"),
  Filter: _keyword("Filter"),
  Graph: _keyword("Graph"),
  Minus: _keyword("Minus"),
  Having: _keyword("Having"),
  Limit: _keyword("Limit"),
  From: _keyword("From"),
  Offset: _keyword("Offset"),
  Group_Concat: _keyword("Group_Concat"),
  Group: _keyword("Group"),
  Optional: _keyword("Optional"),
  Prefix: _keyword("Prefix"),
  Reduced: _keyword("Reduced"),
  Select: _keyword("Select"),
  Service: _keyword("Service"),
  Silent: _keyword("Silent"),
  Union: _keyword("Union"),
  Values: _keyword("Values"),
  Where: _keyword("Where"),
  Count: _keyword("Count"),
  Sum: _keyword("Sum"),
  Min: _keyword("Min"),
  Max: _keyword("Max"),
  Avg: _keyword("Avg"),
  Sample: _keyword("Sample"),
  Separator: _keyword("Separator"),
  Undef: _keyword("Undef"),
  In: _keyword("In"),
  If: _keyword("If"),
} as const;
export const Wildcard = createToken({
  name: "Wildcard",
  pattern: "*",
});

export const Pipe = createToken({
  name: "Pipe",
  pattern: "|",
});
export const Ampersand = createToken({
  name: "Ampersand",
  pattern: "&",
});
export const Exclamation = createToken({
  name: "Exclamation",
  pattern: "!",
});
export const Plus = createToken({
  name: "Plus",
  pattern: "+",
});
export const Questionmark = createToken({
  name: "Questionmark",
  pattern: "?",
});

export const Minus = createToken({
  name: "Minus",
  pattern: "-",
});
export const ForwardSlash = createToken({
  name: "ForwardSlash",
  pattern: "/",
});
export const LessThan = createToken({
  name: "LessThan",
  pattern: "<",
});
export const LessThanOrEquals = createToken({
  name: "LessThanOrEquals",
  pattern: "<=",
});
export const GreaterThan = createToken({
  name: "GreaterThan",
  pattern: ">",
});
export const GreaterThanOrEquals = createToken({
  name: "GreaterThanOrEquals",
  pattern: ">=",
});
export const True = createToken({
  name: "True",
  pattern: /true/i,
});
export const False = createToken({
  name: "False",
  pattern: /false/i,
});

// [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'
export const IriRef = createToken({ name: "IriRef", pattern: /(?:(<(?:[^<>\"\{\}\|\^`\\\u0000-\u0020])*>))/ });
// 140]  	PNAME_NS ::= PN_PREFIX? ':'
export const Pname_Ns = createTokenWithUnicodePattern({
  name: "Pname_Ns",
  pattern: new RegExp(`(${PN_PREFIX})?:`, "u"),
  line_breaks: false,
  start_chars_hint: false,
});
// [141]  	PNAME_LN ::= PNAME_NS PN_LOCAL
export const Pname_Ln = createTokenWithUnicodePattern({
  name: "Pname_Ln",
  pattern: new RegExp(`(${PN_PREFIX})?:${PN_LOCAL}`, "u"),
  line_breaks: false,
  start_chars_hint: false,
});
// [142]  	BLANK_NODE_LABEL	  ::=  	'_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
export const BlankNodeLabel = createTokenWithUnicodePattern({
  name: "BlankNodeLabel",
  pattern: new RegExp(`_:[${PN_CHARS_U}0-9]([${PN_CHARS}\\.]*[${PN_CHARS}])?`, "u"),
  line_breaks: false,
  start_chars_hint: ["_"],
});
// [145]  	LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
export const Langtag = createToken({ name: "Langtag", pattern: /@[a-zA-Z]+(-[a-z-A-Z0-9]+)*/ });

// [146]  	INTEGER ::= [0-9]+
export const Integer = createToken({
  name: "Integer",
  pattern: new RegExp(INTEGER),
});
// [147]  	DECIMAL ::= [0-9]* '.' [0-9]+
export const Decimal = createToken({
  name: "Decimal",
  pattern: new RegExp(DECIMAL),
});
// [148]  	DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT
export const Double = createToken({
  name: "Double",
  pattern: new RegExp(DOUBLE),
});

// [149]  	INTEGER_POSITIVE ::= '+' INTEGER
export const Integer_Positive = createToken({
  name: "Integer_Positive",
  pattern: new RegExp(`\\+${INTEGER}`),
});
// [150]  	DECIMAL_POSITIVE ::= '+' DECIMAL
export const Decimal_Positive = createToken({
  name: "Decimal_Positive",
  pattern: new RegExp(`\\+${DECIMAL}`),
});
// [151]  	DOUBLE_POSITIVE ::= '+' DOUBLE
export const Double_Positive = createToken({
  name: "Double_Positive",
  pattern: new RegExp(`\\+${DOUBLE}`),
});
// [152]  	INTEGER_NEGATIVE ::= '-' INTEGER
export const Integer_Negative = createToken({
  name: "Integer_Negative",
  pattern: new RegExp(`\\-${INTEGER}`),
});
// [153]  	DECIMAL_NEGATIVE ::= '-' DECIMAL
export const Decimal_Negative = createToken({
  name: "Decimal_Negative",
  pattern: new RegExp(`\\-${DECIMAL}`),
});
// [154]  	DOUBLE_NEGATIVE	  ::=
export const Double_Negative = createToken({
  name: "Double_Negative",
  pattern: new RegExp(`\\-${DOUBLE}`),
});

// [156]  	STRING_LITERAL1 ::= "'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'"
export const String_Literal1 = createToken({
  name: "String_Literal1",
  pattern: new RegExp(`'((${ECHAR})|([^\u{0027}\\\u{005c}\u{000a}\u{000d}]))*'`),
});
// [157]  	STRING_LITERAL2 ::= '"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"'
export const String_Literal2 = createToken({
  name: "String_Literal2",
  pattern: new RegExp(`"((${ECHAR})|([^\u{0022}\\\u{005c}\u{000a}\u{000d}]))*"`),
});
// [158]  	STRING_LITERAL_LONG1 ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''"
export const String_Literal_Long1 = createToken({
  name: "String_Literal_Long1",
  pattern: new RegExp(`'''('{0,2}|([^'\\\\]|(${ECHAR})))*?'''`),
  line_breaks: true,
});
// [159]  	STRING_LITERAL_LONG2 ::= '"""' ( ( '"' | '""' )? ( [^"\] | ECHAR ) )* '"""'
export const String_Literal_Long2 = createToken({
  name: "String_Literal_Long2",
  pattern: new RegExp(`"""("{0,2}|([^"\\\\]|(${ECHAR})))*?"""`),
  line_breaks: true,
});

// [162] Matches space, tab and linebreaks
export const Ws = createToken({
  name: "Ws",
  pattern: /\s/,
  group: Lexer.SKIPPED,
});
/**
 * A special comment. This is used to annotate parts of the query. Characteristics:
 * - This comment is _not_ treated as whitespace (eg using `Lexer.Skipped`)
 * - Instead, we postprocess the token list and omit this token right before sending it to the parser
 */
export const Annotation = createToken({
  name: "Annotation",
  pattern: /#!.*/,
});
export const LegacyPaginateAnnotation = createToken({
  name: "LegacyPaginateAnnotation",
  pattern: /#\s*paginate.*/i,
});
export const Comment = createToken({
  name: "Comment",
  pattern: /#.*/,
  group: Lexer.SKIPPED,
});

/**
 * Chevrotain does not support unicode regular expressions. Instead, we wrap the regex and create a custom matcher.
 * We do change the configuration a bit, making start_chars_hint required to incentivize setting this as it optimizes such custom patterns greatly
 * We also mark the line_breaks field required, as that's required when setting a custom pattern.
 * See https://github.com/Chevrotain/chevrotain/issues/1670#issuecomment-1001673472 for more context
 *
 */
function createTokenWithUnicodePattern<N extends string>(
  config: Omit<ITokenConfig<N>, "pattern" | "line_breaks" | "start_chars_hint"> & {
    pattern: RegExp;
    line_breaks: boolean;
    start_chars_hint: false | Array<string | number>;
  },
) {
  const { start_chars_hint, ...tokenConfig } = config;
  const updatedConfig = {
    ...tokenConfig,
    pattern: (text: string, offset: number): RegExpExecArray | null => {
      const re = new RegExp(config.pattern, "uy");
      re.lastIndex = offset;
      return re.exec(text);
    },
    start_chars_hint: start_chars_hint || undefined,
  } as ITokenConfig<N>;
  return createToken(updatedConfig);
  // return (text: string, offset: number): RegExpExecArray | null => {
  //   const re = new RegExp(regex, "uy");
  //   re.lastIndex = offset;
  //   return re.exec(text);
  // };
}

// [143, 144 and 166] Matches a variable
// [166] ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )*
export const Varname = createTokenWithUnicodePattern({
  name: "Varname",
  pattern: new RegExp(
    `[${escapeRegExp("?$")}][${PN_CHARS_U}0-9][${PN_CHARS_U}0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}]*`,
    "u",
  ),
  line_breaks: false,
  start_chars_hint: ["?", "$"],
});
export const A = createToken({
  name: "A",
  pattern: "a",
});
export const Dot = createToken({
  name: "Dot",
  pattern: ".",
});
export const Equals = createToken({
  name: "Equals",
  pattern: "=",
});
export const NotEquals = createToken({
  name: "NotEquals",
  pattern: "!=",
});
export const Comma = createToken({
  name: "Comma",
  pattern: ",",
});
export const SemiColon = createToken({
  name: "SemiColon",
  pattern: ";",
});
export const DoubleCaret = createToken({
  name: "DoubleCaret",
  pattern: "^^",
});
export const Caret = createToken({
  name: "Caret",
  pattern: "^",
});

export const LeftCurl = createToken({
  name: "LeftCurl",
  pattern: "{",
});

export const RightCurl = createToken({
  name: "RightCurl",
  pattern: "}",
});
export const LeftSquare = createToken({
  name: "LeftSquare",
  pattern: "[",
});
export const RightSquare = createToken({
  name: "RightSquare",
  pattern: "]",
});
export const LeftParen = createToken({
  name: "LeftParen",
  pattern: "(",
});
export const RightParen = createToken({
  name: "RightParen",
  pattern: ")",
});
export const allTokens = [
  Pname_Ln,
  Pname_Ns,
  Annotation,
  LegacyPaginateAnnotation,
  Comment,
  ...Object.values(Keywords),
  FuncArity0,
  FuncArity1,
  FuncArity2,
  Wildcard,
  IriRef,
  Ws,
  Varname,
  Langtag,
  Questionmark,
  String_Literal_Long1,
  String_Literal_Long2,
  String_Literal1,
  String_Literal2,
  BlankNodeLabel,
  LeftCurl,
  RightCurl,
  Comma,
  LeftParen,
  RightParen,
  LeftSquare,
  RightSquare,
  DoubleCaret,
  Caret,
  True,
  False,
  Double,
  Double_Negative,
  Double_Positive,
  Decimal,
  Decimal_Negative,
  Decimal_Positive,
  Integer,
  Integer_Negative,
  Integer_Positive,
  A,
  Dot,
  Pipe,
  Equals,
  NotEquals,
  Exclamation,
  Plus,
  Minus,
  ForwardSlash,
  Ampersand,
  LessThanOrEquals,
  LessThan,
  GreaterThanOrEquals,
  GreaterThan,
  SemiColon,
];
