// This file is differentiated from captions_util.ts as it is used for processing transcripts for defining snippets.

import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"

import { getTranscriptItemsInTimeRange } from "../../utils/captions_util"
import { IClipSchemaItem, ITranscriptWord } from "./../../@types/snippet_types"

const TRANSCRIPT_LINE_LOWER_DURATION_LIMIT = 20_000
const TRANSCRIPT_LINE_UPPER_DURATION_LIMIT = 120_000

export const convertJsonToSrt = (json: ITranscriptWord[]) => {
    return json
        .map((t) => {
            const start = convertToSrtTimestamp(t.start)
            const end = convertToSrtTimestamp(t.end)
            const speaker = t.speaker !== undefined ? `${t.speaker}: ` : ""
            return `${start} --> ${end}\n${speaker}${t.text}`
        })
        .join("\n\n")
}

export function convertToSrtTimestamp(durationInSeconds: number): string {
    const hours = Math.floor(durationInSeconds / 3600)
    const minutes = Math.floor((durationInSeconds % 3600) / 60)
    const seconds = Math.floor(durationInSeconds % 60)
    const milliseconds = Math.floor((durationInSeconds % 1) * 1000)
    return `${padZero(hours)}:${padZero(minutes)}:${padZero(seconds)},${padZero(milliseconds, 3)}`
}

export function convertTimestampToMillisecondInt(timestamp: string): number {
    const [hours, minutes, secondsAndMilliseconds] = timestamp.split(":")
    let split: string[]
    if (secondsAndMilliseconds.includes(",")) {
        split = secondsAndMilliseconds.split(",")
    } else {
        split = secondsAndMilliseconds.split(".")
    }
    const [seconds, milliseconds = "0"] = split
    const hoursNum = parseInt(hours, 10)
    const minutesNum = parseInt(minutes, 10)
    const secondsNum = parseInt(seconds, 10)
    const millisecondsNum = parseInt(milliseconds, 10)
    const totalMilliseconds =
        hoursNum * 3_600_000 + minutesNum * 60_000 + secondsNum * 1_000 + millisecondsNum
    return totalMilliseconds
}

export function padZero(number: number, size = 2): string {
    let padded = String(number)
    while (padded.length < size) {
        padded = "0" + padded
    }
    return padded
}

export const findChar = (str: string, char: string) => {
    return Array.from(str).reduce(
        (indices, c, i) => (c === char ? [...indices, i] : indices),
        [] as number[],
    )
}

export const getSentencesFromTranscript = (
    transcript: ITranscriptWord[],
    noOfSentences: number,
    fromEnd = false,
) => {
    const transcriptSentences = combineTranscriptByFullStop(transcript)
    if (fromEnd) {
        return transcriptSentences.slice(-noOfSentences)
    }
    return transcriptSentences.slice(0, noOfSentences + 1)
}

// Combine transcript items that are split by a character
export const combineTranscriptByFullStop = (transcriptItems: ITranscriptWord[]) => {
    const FULLSTOP = "."
    return transcriptItems.reduce((prev, t) => {
        const lastSentence = prev[prev.length - 1]
        if (lastSentence && lastSentence.text.endsWith(FULLSTOP)) {
            // Don't merge if the last sentence ends with a fullstop
            // as it's already a complete sentence
            return [...prev, t]
        } else if (lastSentence) {
            const allExceptLast = prev.slice(0, prev.length - 1)
            const mergedItem = {
                ...lastSentence,
                end: t.end,
                duration: t.end - lastSentence.start,
                text: lastSentence.text + " " + t.text,
            }
            return [...allExceptLast, mergedItem]
        } else {
            // First item
            return [t]
        }
    }, [] as typeof transcriptItems)
}

export const convertAssemblyOutputToTranscriptItems = <T extends ITranscriptWord>(
    assemblyWords: T[],
) => {
    return (
        assemblyWords
            .reduce((acc, word) => {
                if (acc.length === 0) {
                    return [
                        {
                            start: word.start,
                            end: word.end,
                            text: word.text,
                            duration: word.end - word.start,
                            speaker: word.speaker,
                        },
                    ]
                }
                const last = acc[acc.length - 1]
                const shouldSplit =
                    last?.speaker !== word.speaker ||
                    last.text.endsWith(".") ||
                    last.text.endsWith("?")
                if (shouldSplit) {
                    return [
                        ...acc,
                        {
                            start: word.start,
                            end: word.end,
                            text: word.text,
                            duration: word.end - word.start,
                            speaker: word.speaker,
                        },
                    ]
                }
                const allBeforeCurrent = acc.slice(0, acc.length - 1)
                return [
                    ...allBeforeCurrent,
                    {
                        start: last.start,
                        end: word.end,
                        text: last.text + " " + word.text,
                        duration: word.end - last.start,
                        speaker: word.speaker,
                    },
                ]
            }, [] as (ITranscriptWord & { duration: number })[])
            // convert timestamps from ms to seconds
            .map(convertTranscriptMsToSeconds)
    )
}

export const convertTranscriptMsToSeconds = <T extends ITranscriptWord>(t: T) => ({
    ...t,
    start: t.start / 1_000,
    end: t.end / 1_000,
    text: t.text.trim(),
})

export const convertTranscriptSecondsToMs = <T extends ITranscriptWord>(t: T) => ({
    ...t,
    start: t.start * 1_000,
    end: t.end * 1_000,
    text: t.text.trim(),
})

export const getTranscriptForClipSchema = <T extends ITranscriptWord>(args: {
    schema: IClipSchemaItem[]
    transcript: T[]
}) => {
    const { transcript, schema } = args
    return schema.reduce((acc, interval) => {
        const intervalEnd = interval.start + interval.duration
        const words = getTranscriptItemsInTimeRange({
            transcript,
            start: interval.start,
            end: intervalEnd,
        })
        if (words.length === 0) {
            return acc
        }
        if (words[0].start < interval.start) {
            words[0] = {
                ...words[0],
                start: interval.start,
            }
        }
        if (words[words.length - 1].end > intervalEnd) {
            words[words.length - 1] = {
                ...words[words.length - 1],
                end: intervalEnd,
            }
        }
        return [...acc, ...words]
    }, [] as T[])
}

// 1. If the last item in the transcript ends with a fullstop, return range as is
// 2. If the last item in the transcript does not end with a fullstop:
//  - Check if the next item is over 1s away, if so return range as is
//  - Find the next item that ends with a fullstop
export const adjustClipRange = <T extends ITranscriptWord>(args: {
    transcript: T[]
    // Both in seconds
    start: number
    end: number
}) => {
    const { transcript, end } = args
    let start = args.start
    let duration = end - start
    let itemsInRange = getTranscriptItemsInTimeRange({ transcript, start, end })
    if (itemsInRange.length === 0) {
        return { start, end }
    }
    const startValueExists = transcript.findIndex((item) => item.start === start) !== -1
    if (!startValueExists) {
        // find the closest item to the start value
        const closestItem = itemsInRange.reduce((prev, curr) =>
            Math.abs(curr.start - start) < Math.abs(prev.start - start) ? curr : prev,
        )
        start = closestItem.start
        duration = end - start
        itemsInRange = getTranscriptItemsInTimeRange({ transcript, start, end })
    }
    const startsAtSentenceEnd = itemsInRange[0].text.match(/[.!?]$/)
    if (startsAtSentenceEnd) {
        start = itemsInRange[1].start
        duration = end - start
    }
    const lastItem = itemsInRange[itemsInRange.length - 1]
    const hasGoodEnding = lastItem.text.match(/[.!?]$/)
    if (hasGoodEnding) {
        return { start, end }
    }

    const upperDurationDelta = TRANSCRIPT_LINE_UPPER_DURATION_LIMIT - duration
    let adjustedOutput = getTranscriptItemsInTimeRange({
        transcript,
        start: end,
        end: end + upperDurationDelta,
    })
    if (adjustedOutput.length === 0) {
        return { start, end }
    }

    // If diarised, remove all content from other speakers
    const isDiarised = adjustedOutput[0].speaker !== undefined
    if (isDiarised) {
        const nextSpeakerIndex = adjustedOutput.findIndex(
            (item) => item.speaker !== lastItem.speaker,
        )
        adjustedOutput = adjustedOutput.slice(0, nextSpeakerIndex)
    }

    // Some times the first item in adjustedOutput is the same as the last item in initialOutput, skip it
    const nextItem = adjustedOutput.find((item) => {
        if (lastItem.start === item.start && lastItem.end === item.end) {
            return false
        }
        return true
    })
    const hasItemsToExamine = !!nextItem
    if (!hasItemsToExamine) {
        return { start, end }
    }

    const gapSize = nextItem.start - lastItem.end
    const isSignificantGap = gapSize > 1_000
    if (isSignificantGap) {
        return { start, end }
    }

    const betterEndItem = adjustedOutput.find((item) =>
        item.text.charAt(item.text.length - 1).match(/[.!?]/),
    )
    if (!betterEndItem) {
        return { start, end }
    }

    return { start, end: betterEndItem.end }
}

const isConjunction = (text: string) => {
    const COORDINATING_CONJUNCTIONS = ["and", "or", "but", "yet", "so", "for", "nor"]
    return COORDINATING_CONJUNCTIONS.includes(text?.toLowerCase())
}

// Combine transcript items beginning with conjunctions like "and/or/but" into previous sentence
export const combineTranscriptByConjunctions = (transcriptItems: ITranscriptWord[]) => {
    return transcriptItems.reduce((prev, currentItem, index) => {
        const lastItem = prev[prev.length - 1]
        if (!lastItem) {
            return [currentItem]
        }
        const [firstWord, ...otherWords] = currentItem.text.split(" ")
        const requiresMerge = isConjunction(firstWord ?? "")
        if (!requiresMerge) {
            return [...prev, currentItem]
        }
        const allItemsExceptLast = prev.slice(0, prev.length - 1)
        const currentText = firstWord.toLowerCase() + " " + otherWords.join(" ")
        const lastText = lastItem.text.replace(/\.$/, "")
        const mergedItem = {
            ...lastItem,
            end: currentItem.end,
            duration: currentItem.end - lastItem.start,
            text: lastText + " " + currentText,
        }
        return [...allItemsExceptLast, mergedItem]
    }, [] as typeof transcriptItems)
}

// Merge transcript items that begin "And" to the previous item.
export const reconstructTranscriptToSrt = (srtJson: ITranscriptWord[]) => {
    const tFixedTimes = srtJson
        .map((t) => ({
            ...t,
            duration: parseFloat((t.end - t.start).toFixed(3)),
            start: parseFloat(t.start.toFixed(3)),
            end: parseFloat(t.end.toFixed(3)),
        }))
        // absorb single words into previous subtitle
        .reduce((acc, current, i) => {
            if (i == 0) {
                return [current]
            }
            const last = acc[acc.length - 1] as typeof current
            const shouldAbsorbIntoPrev =
                !current.text.includes(" ") && current?.speaker === last?.speaker
            if (!shouldAbsorbIntoPrev) {
                return [...acc, current]
            }
            const allButLast = acc.slice(0, acc.length - 1)
            return [
                ...allButLast,
                {
                    start: last.start,
                    end: current.end,
                    duration: current.end - last.start,
                    text: last.text + " " + current.text,
                    speaker: current.speaker,
                },
            ]
        }, [] as ITranscriptWord[])

    const tProcessed = convertJsonToSrt(tFixedTimes)

    return tProcessed
}

export const processTranscriptionOutputToSrt = <T extends ITranscriptWord>(words: T[]) => {
    const srtJson = convertAssemblyOutputToTranscriptItems(words)
    const processedSrt = reconstructTranscriptToSrt(srtJson)
    return processedSrt
}

export const createTextSplitter = (durationInSeconds: number, transcriptCharCount: number) => {
    const MAX_CHARACTERS_PER_CHUNK = 7_500
    const SECONDS_PER_MINUTE = 60
    const durationInMinutes = durationInSeconds / SECONDS_PER_MINUTE
    const maxCharsPerMinute = transcriptCharCount / durationInMinutes
    let chunkCount: number
    if (durationInMinutes < 5) {
        chunkCount = 2
    } else if (durationInMinutes < 10) {
        chunkCount = 4
    } else if (durationInMinutes < 15) {
        chunkCount = 5
    } else if (durationInMinutes < 25) {
        chunkCount = 6
    } else if (durationInMinutes < 40) {
        chunkCount = 7
    } else if (durationInMinutes < 50) {
        chunkCount = 8
    } else {
        chunkCount = 10
    }
    const chunkSize = Math.max(
        3_000,
        Math.min(Math.floor(transcriptCharCount / chunkCount), MAX_CHARACTERS_PER_CHUNK),
    )
    const chunkOverlap = Math.max(
        200,
        Math.min(Math.round(maxCharsPerMinute * 0.5), chunkSize - 1), // must be < chunkSize
    )
    return new RecursiveCharacterTextSplitter({
        chunkSize,
        chunkOverlap,
        separators: ["\n"],
    })
}

type TStampedSnippetRange = {
    title: string
    startTimestamp: string
    endTimestamp: string
    hashtags: string
}

export type TSnippetRange = TStampedSnippetRange & {
    start: number
    end: number
}

const chooseBetterRange = (a: TSnippetRange, b: TSnippetRange): TSnippetRange => {
    const durationA = a.end - a.start
    const durationB = b.end - b.start
    const idealDuration = 45_000

    const isValidA =
        durationA >= TRANSCRIPT_LINE_LOWER_DURATION_LIMIT && durationA <= TRANSCRIPT_LINE_UPPER_DURATION_LIMIT
    const isValidB =
        durationB >= TRANSCRIPT_LINE_LOWER_DURATION_LIMIT && durationB <= TRANSCRIPT_LINE_UPPER_DURATION_LIMIT

    if (isValidA && !isValidB) return a
    if (isValidB && !isValidA) return b

    const scoreA = Math.abs(durationA - idealDuration)
    const scoreB = Math.abs(durationB - idealDuration)

    return scoreA <= scoreB ? a : b
}

export const filterEncapsulatedRanges = (ranges: TSnippetRange[]): TSnippetRange[] => {
    if (ranges.length <= 1) return ranges

    const sortedRanges = ranges.filter(Boolean).sort((a, b) => a.start - b.start)

    const result: TSnippetRange[] = []
    let currentRange = sortedRanges[0]

    for (const range of sortedRanges.slice(1)) {
        if (range.start > currentRange.end) {
            // No overlap, add current range and move to next
            result.push(currentRange)
            currentRange = range
        } else {
            // Calculate overlap
            const overlapStart = Math.max(currentRange.start, range.start)
            const overlapEnd = Math.min(currentRange.end, range.end)
            const overlapDuration = overlapEnd - overlapStart

            if (overlapDuration > 20) {
                currentRange = chooseBetterRange(currentRange, range)
            } else {
                // Overlap is 20% or less, keep both ranges
                result.push(currentRange)
                currentRange = range
            }
        }
    }

    result.push(currentRange)
    return result
}

export const filterDuplicateRanges = (ranges: TSnippetRange[]) => {
    return ranges.filter(
        (range, index, self) =>
            index === self.findIndex((t) => t.start === range.start && t.end === range.end),
    ) as TSnippetRange[]
}

export const filterLengthCriteria = (ranges: TSnippetRange[]) => {
    return ranges.filter((range) => {
        const duration = range.end - range.start
        // Note: times are in milliseconds
        return duration >= TRANSCRIPT_LINE_LOWER_DURATION_LIMIT && duration <= TRANSCRIPT_LINE_UPPER_DURATION_LIMIT
    }) as TSnippetRange[]
}

export const addIntTimesToRanges = (list: TStampedSnippetRange[]) => {
    return list.map((x) => ({
        ...x,
        start: Math.round(convertTimestampToMillisecondInt(x.startTimestamp)),
        end: Math.round(convertTimestampToMillisecondInt(x.endTimestamp)),
    })) as TSnippetRange[]
}

export const sortRanges = (list: TSnippetRange[]) =>
    list.sort((a, b) => {
        return a.start - b.start
    })

export const processSnippetLocationOutputs = (
    results: {
        startTimestamp: string
        endTimestamp: string
        title: string
        hashtags: string
    }[],
) => {
    const processedOutputs = [
        addIntTimesToRanges,
        filterLengthCriteria,
        filterDuplicateRanges,
        filterEncapsulatedRanges,
        sortRanges,
        // @ts-ignore
    ].reduce((prev, fn) => fn(prev), results) as unknown as TSnippetRange[]
    return processedOutputs
}
