import { GetDocumentAnalysisCommandOutput } from '@aws-sdk/client-textract';
import { ApiResponsePage, Field, Table, TextractDocument } from 'amazon-textract-response-parser/dist/cjs';

export type TextractKeyValue = {
  key: string;
  value: string;
  pageNumber: number;
  // Textract metadata about the field; this isn't used by the UI, but marking as optional in
  // the event it becomes useful.
  _field?: Field;
};

export type PageKeyValues = {
  pageNumber: number;
  keyValues: TextractKeyValue[];
};

export type PageTables = {
  pageNumber: number;
  tables: Table[];
};

export { listTextractKeyValues, listTextractTables };

const listTextractKeyValues = (documentAnalysis: GetDocumentAnalysisCommandOutput): PageKeyValues[] => {
  /**
   * From https://github.com/aws-samples/amazon-textract-response-parser/blob/master/src-js/README.md
   *
   * If you're using TypeScript, you may need to typecast your input JSON while loading it.
   * The ApiResponsePage input interface exposed and expected by this module is subtly different from -
   * but functionally compatible with - the output types produced by the AWS SDK for JavaScript Textract Client.
   */
  const document = new TextractDocument(documentAnalysis as ApiResponsePage);

  const formFields = document.listPages().map((page) => page.form.listFields(true));

  const allKeyValues: PageKeyValues[] = formFields.map((fields, pageIdx): PageKeyValues => {
    // Textract indices are 1-based; ensure the page number matches what Textract expects
    const textractPageNumber = pageIdx + 1;
    const pageKeyValues: TextractKeyValue[] = fields.map((field) => ({
      key: field.key.text,
      value: field.value!.text,
      pageNumber: textractPageNumber,
      _field: field,
    }));

    return {
      pageNumber: textractPageNumber,
      keyValues: pageKeyValues,
    };
  });

  return allKeyValues;
};

const listTextractTables = (documentAnalysis: GetDocumentAnalysisCommandOutput): PageTables[] => {
  /**
   * From https://github.com/aws-samples/amazon-textract-response-parser/blob/master/src-js/README.md
   *
   * If you're using TypeScript, you may need to typecast your input JSON while loading it.
   * The ApiResponsePage input interface exposed and expected by this module is subtly different from -
   * but functionally compatible with - the output types produced by the AWS SDK for JavaScript Textract Client.
   */
  const document = new TextractDocument(documentAnalysis as ApiResponsePage);

  const tables = document.listPages().map((page, pageNum) =>
    // Textract indices are 1-based; ensure the page number matches what Textract expects
    ({
      pageNumber: pageNum + 1,
      tables: page.listTables(),
    })
  );

  return tables;
};
