// This file is differentiated from transcript_util.ts as it is used for generating captions for viewing and exporting

import { BrandStyle } from "@prisma/client"

import {
    EBrandPunctuationOption,
    ECaptionFont,
    ECaptionStyle,
    TBrandStylingObject,
} from "../@types/brand_style_types"
import { ICaptionWord, ICaptionWordWithFrames, IClipSchemaItem, ITranscriptWord } from "../@types/snippet_types"
import { EBrandStyleSection, TBrandStyle } from "./../@types/brand_style_types"

// Adds a rowIndex to each word in the caption so that we know to which
// caption sentence each word belongs.
export const appleCaptionStyleMultipleLine = <
    T extends { start: number; end: number; text: string },
>(
    wordList: T[],
    maxChars: number,
): (T & { rowIndex: number })[] => {
    const GAP_DURATION_THRESHOLD = 0.6
    const captionsWithRows = [] as (T & { rowIndex: number })[]
    for (let i = 0; i < wordList.length; i++) {
        const word = wordList[i]
        if (captionsWithRows.length === 0) {
            captionsWithRows.push({
                ...word,
                rowIndex: 0,
            })
            continue
        }
        const lastWord = captionsWithRows[captionsWithRows.length - 1]
        const currentRowLength =
            captionsWithRows
                .filter((w) => w.rowIndex === lastWord.rowIndex)
                .map((w) => w.text)
                .join(" ").length ?? 0
        const cumulativeRowLength = currentRowLength + word.text.length
        const exceedsCharLimit = cumulativeRowLength > maxChars
        const isGapSignificant = word.start - lastWord.end > GAP_DURATION_THRESHOLD
        const shouldSplit = exceedsCharLimit || isGapSignificant
        if (shouldSplit) {
            captionsWithRows.push({
                ...word,
                rowIndex: lastWord.rowIndex + 1,
            })
            continue
        }
        captionsWithRows.push({
            ...word,
            rowIndex: lastWord.rowIndex,
        })
    }
    return captionsWithRows
}

const removePunctuationFromCaptions = <T extends { start: number; end: number; text: string }>(
    captions: T[],
    option: string,
): T[] => {
    const TEMP_SEQUENCE = "~^~"
    const REGULAR_PUNCTUATION_REGEX = /[.,]/g
    const SPECIAL_PUNCTUATION_REGEX = /[!?]/g
    const MID_NUMBER_REMOVE_REGEX = /(\d)~\^~(\d)/g

    return captions.map((c) => {
        // Must be done in two passes, as look ahead/behind is not supported in Safari
        let processedText = c.text
        if (option === EBrandPunctuationOption.None || option === EBrandPunctuationOption.Special) {
            processedText = processedText.replace(REGULAR_PUNCTUATION_REGEX, TEMP_SEQUENCE)
        }
        if (option === EBrandPunctuationOption.None || option === EBrandPunctuationOption.Regular) {
            processedText = processedText.replace(SPECIAL_PUNCTUATION_REGEX, TEMP_SEQUENCE)
        }
        processedText = processedText.replace(MID_NUMBER_REMOVE_REGEX, "$1.$2")
        processedText = processedText.replace(TEMP_SEQUENCE, "")

        return {
            ...c,
            text: processedText,
        }
    })
}

// Min and Max chars per line for each font, based on supported font sizes
const fontBoundsMap = {
    [ECaptionFont.Montserrat]: { min: 6, max: 12 },
    [ECaptionFont.Caprasimo]: { min: 6, max: 12 },
    [ECaptionFont.BebasNeue]: { min: 10, max: 22 },
    [ECaptionFont.PermanentMarker]: { min: 6, max: 15 },
    [ECaptionFont.BagelFatOne]: { min: 6, max: 15 },
    [ECaptionFont.ClimateCrisis]: { min: 5, max: 9 },
    [ECaptionFont.TheBoldOne]: { min: 7, max: 17 },
    [ECaptionFont.Bangers]: { min: 11, max: 20 },
    [ECaptionFont.Bungee]: { min: 5, max: 14 },
    [ECaptionFont.Gloock]: { min: 5, max: 16 },
    [ECaptionFont.Koulen]: { min: 9, max: 21 },
    [ECaptionFont.Poppins]: { min: 6, max: 13 },
    [ECaptionFont.RedHatDisplay]: { min: 5, max: 13 },
    [ECaptionFont.Syne]: { min: 3, max: 10 },
    [ECaptionFont.Unbounded]: { min: 4, max: 22 },
} as Record<ECaptionFont, { min: number; max: number }>

// TODO(mujavid): get this to work for landscape videos.
const getLineLengthForFontSize = (brand: BrandStyle) => {
    const MIN_FONT_SIZE = 30
    const MAX_FONT_SIZE = 60
    const { fontSize, fontFamily } = (brand.styles as TBrandStyle)[EBrandStyleSection.General]
    const { min: minChars, max: maxChars } = fontBoundsMap[fontFamily as ECaptionFont]

    if (fontSize <= MIN_FONT_SIZE) {
        return maxChars
    }
    if (fontSize >= MAX_FONT_SIZE) {
        return minChars
    }

    const slope = (minChars - maxChars) / (MAX_FONT_SIZE - MIN_FONT_SIZE)
    return Math.round(slope * (fontSize - MIN_FONT_SIZE) + maxChars)
}

export const getTranscriptItemsInTimeRange = <T extends ITranscriptWord>(args: {
    transcript: T[]
    // Both in seconds
    start: number
    end: number
}) => {
    const { transcript, start, end } = args
    const filteredTranscript = transcript.filter(({ start: textStart, end: textEnd }) => {
        const isInRange = textStart >= start && textEnd <= end
        if (isInRange) {
            return true
        }
        const startsBeforeRange = textStart < start && textEnd > start
        if (startsBeforeRange) {
            return true
        }
        const endsAfterRange = textStart < end && textEnd > end
        if (endsAfterRange) {
            return true
        }
        return false
    })
    return filteredTranscript
}

export const getTranscriptForClipSchema = <T extends ITranscriptWord>(args: {
    schema: IClipSchemaItem[]
    transcript: T[]
}) => {
    const { transcript, schema } = args
    const startOffset = schema[0].start
    const transcriptIntervals = schema.map((interval, i) => {
        const intervalEnd = interval.start + interval.duration
        const words = getTranscriptItemsInTimeRange({
            transcript,
            start: interval.start,
            end: intervalEnd,
        })
        if (words.length === 0) {
            return []
        }
        if (words[0].start < interval.start) {
            words[0] = {
                ...words[0],
                start: interval.start,
            }
        }
        if (words[words.length - 1].end > intervalEnd) {
            words[words.length - 1] = {
                ...words[words.length - 1],
                end: intervalEnd,
            }
        }
        return words
    })
    const cumulativeGaps = schema
        .map((interval, i) => {
            if (i === 0) {
                return 0
            }
            const prevEnd = schema[i - 1].start + schema[i - 1].duration
            return interval.start - prevEnd
        })
        .reduce((acc, gap, i) => {
            if (i === 0) {
                return [gap]
            }
            return [...acc, acc[acc.length - 1] + gap]
        }, [] as number[])
    const retimedTranscript = transcriptIntervals
        .map((words, i) => {
            const additionalOffset = cumulativeGaps[i]
            const totalOffset = startOffset + additionalOffset
            const wordsWithTime = words.map((word) => ({
                ...word,
                start: word.start - totalOffset,
                end: word.end - totalOffset,
            }))
            return wordsWithTime
        })
        .flat()
    return retimedTranscript
}

// start/end must be in seconds
export const applyCaptionStyleToWordList = <T extends { start: number; end: number; text: string }>(
    brand: BrandStyle,
    wordList: T[],
): (T & { rowIndex: number })[] => {
    const captionStyle = brand.captionStyle as ECaptionStyle
    const lineLengthInChars = getLineLengthForFontSize(brand)
    const processedWordList = removePunctuationFromCaptions<T>(wordList, brand.punctuationOption)

    let output: (T & { rowIndex: number })[] = []
    if (captionStyle === ECaptionStyle.SingleWord) {
        output = processedWordList.map((w, i) => ({ ...w, rowIndex: i }))
    } else if (captionStyle === ECaptionStyle.SingleLine) {
        output = appleCaptionStyleMultipleLine<T>(processedWordList, lineLengthInChars)
    } else if (captionStyle === ECaptionStyle.MultipleLines) {
        output = appleCaptionStyleMultipleLine<T>(processedWordList, lineLengthInChars * 2)
    }
    return output
}

export const getTranscriptRangeIndices = <T extends ITranscriptWord>(
    transcript: T[],
    start: number,
    end: number,
) => {
    const transcriptRange = getTranscriptItemsInTimeRange({
        transcript,
        start,
        end,
    })

    const startIndex = transcript.findIndex((t) => t.start === transcriptRange[0].start)

    const endIndex = transcript.findIndex(
        (t) => t.end === transcriptRange[transcriptRange.length - 1].end,
    )

    return { startIndex, endIndex }
}

export const getCaptionStyles = (
    caption: ICaptionWordWithFrames,
    brandStyle: TBrandStyle,
): TBrandStylingObject => {
    const brandCss = brandStyle[EBrandStyleSection.General]
    let builder = { ...brandCss }
    if (caption.styles) {
        builder = { ...builder, ...caption.styles }
    }
    return builder as TBrandStylingObject
}

// Convert list of word objects into list of list of word objects grouped by row index
export const groupByRowIndex = (captions: ICaptionWord[]): ICaptionWord[][] => {
    const result: ICaptionWord[][] = []

    let activeRowIndex = -1
    for (let i = 0; i < captions.length; i++) {
        if (activeRowIndex !== captions[i].rowIndex) {
            activeRowIndex = captions[i].rowIndex
            result.push([])
        }
        result[result.length - 1].push(captions[i])
    }

    return result
}

export const mergeRowsIntoCaptions = (captionRows: ICaptionWord[][]): ICaptionWord[] => {
    const result: ICaptionWord[] = []

    for (let i = 0; i < captionRows.length; i++) {
        const firstCaption = captionRows[i][0]
        const lastCaption = captionRows[i][captionRows[i].length - 1]
        const text = captionRows[i].map((c) => c.text).join(" ")
        const caption = {
            start: firstCaption.start,
            end: lastCaption.end,
            text,
            styles: {},
            rowIndex: i,
        }
        result.push(caption)
    }

    return result
}

// Takes a list of words:
// 1. groups by row Index
// 2. merges all words in each row into a single caption
// 3. adds startFrame and endFrame to each merged caption
export const formatCaptions = (captions: ICaptionWord[], fps: number) => {
    const rows = groupByRowIndex(captions)
    const mergedRows = mergeRowsIntoCaptions(rows)
    return mergedRows.map((caption) => {
        return {
            ...caption,
            startFrame: Math.round(caption.start * fps),
            endFrame: Math.round(caption.end * fps),
        }
    })
}
