import { Learner } from "./learner";
import { ObjectLiteral } from "lib/utils";
import { DataProcessingStatus } from "./data";
import { Label } from "./labels";
import { User } from "./users";
import { DashboardConfiguration } from "services/llm.service";

// See /src/external/app/routers/projects.py

export interface Field {
  name: string;
  id: number;
  data_type: DataTypes;
  distinct_values: Array<string>;
  default?: ObjectLiteral;
  null_count?: number;
  value_counts?: ObjectLiteral;
  meta?: ObjectLiteral;
  required?: boolean;
  display_name?: string;
  count: number;
  latest_counts_increase: ObjectLiteral;
  dataset_id?: number;
  created_at?: string;
  updated_at?: string;
}

export interface Dataset {
  id: number;
  name: string;
  description: string;
  field_names: Array<string>;
  count: number;
  fields: Array<Field>;
  data: Array<ObjectLiteral>;
  created_at?: string;
  updated_at?: string;
  status_counts?: Record<DataProcessingStatus, number>;
}

/**
 * A policy for how how many annotations to get on some proportion of the data.
 *
 * Providing multiple annotations for the same data-points provides consensus metrics
 * and can help improve the quality of the final annotations provided to the model
 */
export interface Policy {
  num_annotators: number;
  data_proportion: number;
  object: "policy";
  id: string;
}

export interface Project {
  name: string;
  description: string;
  inputs: Array<ProjectInput>;
  outputs: Array<ProjectOutput>;
  // Backend also has input_order (yes inputs are already an array but this
  // makes life easier on the backend)
  input_order: Array<string>;
  // Also for outputs (although there's just one output per project right now)
  output_order: Array<string>;
  users: Array<User>;

  learner?: Learner;
  learner_config: ObjectLiteral;
  policy: Policy;

  // Shown at the top of annotation interface
  instructions: string;
  guidelines: string;

  // Stats
  data_count?: number;
  data_without_tasks_count?: number;
  tasks_count?: number;
  latest_completed_tasks_batch_size?: number;
  completed_tasks_count?: number;
  incomplete_tasks_count?: number;
  flagged_count?: number;

  // We store some parts of the datasets used while constructing new projects to ensure
  // all inputs/outputs have a field_id for every dataset
  // Not actually used by the backend during the POST though.
  datasets?: Array<Dataset>;
  // Also not used by the backend
  stepsTouched?: Array<boolean>;
  default_dataset_id?: number;
  id: number;
  shared_id?: string;
  pred_count?: number;

  // Task allocation
  task_allocation_strategy: "automatic" | "manual";
  task_allocation_batch_size: number;

  dashboard_configuration?: DashboardConfiguration;
  created_at?: string;
  updated_at?: string;
  external_id: string;
}

// Matches inputs and outputs back to a field in a dataset
export interface DataSource {
  // Field IDs are unique across all datasets
  field_id: number;
  id?: number;
  name?: string;
}

// TODO (picky) - these should be singular (ie just DisplayType)
export enum DisplayTypes {
  text = "text",
  title = "title",
  meta = "meta",
  link = "link",
  pdf = "pdf",
  image = "image",
}

// Replaced TASKTYPES constant
export enum TaskTypes {
  classification = "classification",
  multilabel = "multi_label_classification",
  span_tagging = "sequence_tagging",
  ordinal_regression = "ordinal_regression",
  generation = "generation",
}

export enum ResultType {
  classification = "classification",
  span = "span",
}

// Replaced DATATYPES constant
export enum DataTypes {
  // Used for inputs
  text = "text",
  // Not 'URL'.
  url = "url",
  number = "number",
  categorical = "categorical",
  multi_categorical = "multi_categorical",
  quantitative = "quantitative",
  character_offsets = "character_offsets", // Will be renamed "spans" in future
  ordinal_regression = "ordinal_regression",
  file_pdf = "file_pdf",
}

// Sometimes we need to to the same things for Inputs and Outputs
// Eg finding unused names, or removing entries from data_sources
export interface IO {
  // Set by user. When user supplies new inputs for this project via the API, this is the name they'll use
  name: string;
  description: string;
  // The data sources used by this input or output
  data_sources: Array<DataSource>;
  data_type: DataTypes;
}

// Somewhat odd name but I imagine since 'input' means input element, we want to be more specific.
export interface ProjectInput extends IO {
  id?: number;
  // Show to annotators, but don't use for the model. Chosen by the user.
  display_only: boolean;
  meta: {
    display_as?: DisplayTypes;
  };
  display_name?: string;
}

export interface ProjectOutput extends IO {
  id?: number;
  meta: ObjectLiteral;
  task_type: TaskTypes;
  instructions: string;
  label_order: Array<string>;
  labels: Array<Label>;
  input?: string; // Only for task_type = TaskTypes.span_tagging - the 'name' field of the input this output will be for.
}

export const enum AIStatus {
  loading = "Loading",
  untrained = "Untrained",
  waiting = "Waiting",
  training = "Training",
  scoring = "Scoring",
}

export interface ClassBreakdown {
  "precision": number;
  "recall": number;
  "f1-score": number;
  "support": number;
}

export interface EvalItem {
  main_score: number;
  accuracy: number;
  macro_f_score: number;
  micro_f_score: number;
  class_breakdown: Record<string, ClassBreakdown>;
}

export interface EvalMetrics {
  train_loss: number[];
  dev_loss: number[];
  test_loss: number[];
  train_eval: EvalItem[];
  dev_eval: EvalItem[];
  test_eval: EvalItem[];
  learning_rate: number[];
  test_score: number | null;
  num_labels: number;
}

export interface Evaluation {
  id: number;
  learner_id: number; //TODO: Are learner_id and id the same?
  eval_metrics: EvalMetrics;
  // In the project response's learner, eval_metrics overloaded with some trainer_params,
  // which tbh, i'm not sure we want to expose.

  created_at: string;
  updated_at: string | null;
}
