import {
    Box,
    Button,
    ButtonProps,
    Flex,
    Icon,
    IconButton,
    Image,
    Spinner,
    Tooltip,
} from "@chakra-ui/react"
import { useRouter } from "next/router"
import {
    Fragment,
    MutableRefObject,
    useCallback,
    useEffect,
    useMemo,
    useRef,
    useState,
} from "react"
import { MdReplay } from "react-icons/md"

import { ITranscriptWord } from "../../@types/snippet_types"
import _c from "../../configs/constants"
import { useDismissableOverlay } from "../../hooks/useDismissableOverlay"
import { IVideoState } from "../../hooks/useVideoReducer"
import { getSentencesFromTranscript } from "../../server/utils/transcript_util"
import { measureTextDimensions } from "../../utils/browser_util"
import { getTranscriptItemsInTimeRange, getTranscriptRangeIndices } from "../../utils/captions_util"

const SENTENCE_THRESHOLD = 3

interface IClipWord extends ITranscriptWord {
    active: boolean
}

interface IEditorTranscriptProps {
    videoRef: MutableRefObject<HTMLVideoElement | null>
    videoState: IVideoState
    onSelectStartWord: (wordClicked: ITranscriptWord) => void
    onSelectEndWord: (wordClicked: ITranscriptWord) => void
    onCaptionWordChange: (word: ITranscriptWord) => void
    onChangeEditCaptionState: (value: boolean) => void
    onDeleteWords: (start: number, end: number) => void
    onEnableWords: (start: number, end: number) => void
}

// TODO(mujavid): move common logic out of snippet player and editor video.
// TODO(mujavid): test

const EditorTranscript = ({
    videoRef,
    videoState,
    onSelectStartWord,
    onSelectEndWord,
    onChangeEditCaptionState,
    onCaptionWordChange,
    onDeleteWords,
    onEnableWords,
}: IEditorTranscriptProps) => {
    const { captionWords: words, clipSchema } = videoState

    const router = useRouter()
    const [transcript, setTranscript] = useState(words)
    const [isLoading, setIsLoading] = useState<boolean>(true)
    const [currentSpokenWord, setCurrentSpokenWord] = useState<ITranscriptWord | null>(null)
    const [tooltipPosition, setTooltipPosition] = useState<{ x: number; y: number } | null>(null)
    const [currentEditingCaption, setCurrentEditingCaption] = useState<string | null>(null)
    const [highlightStartWord, setHighlightStart] = useState<ITranscriptWord | null>(null)
    const [highlightEndWord, setHighlightEnd] = useState<ITranscriptWord | null>(null)
    const [isDragging, setIsDragging] = useState<boolean>(false)

    const transcriptEditorRef = useRef<HTMLDivElement | null>(null)
    const wordToEditRef = useRef<HTMLInputElement | null>(null)
    const prevScriptRef = useRef<HTMLDivElement | null>(null)
    const currentTranscriptRef = useRef<HTMLDivElement | null>(null)
    const tooltipRef = useRef<HTMLDivElement | null>(null)

    const highlightedWordRange = (() => {
        if (!highlightEndWord || !highlightStartWord) {
            return null
        }
        return [highlightStartWord, highlightEndWord].sort((a, b) => a.start - b.start)
    })()
    const clickedWord =
        highlightedWordRange !== null && highlightStartWord!.start === highlightEndWord!.start
            ? highlightStartWord
            : null
    const isTooltipVisible = highlightedWordRange !== null && !isDragging
    const isEditingWord = currentEditingCaption !== null

    const clipStart = (() => {
        if (!clipSchema?.[0]) {
            return 0
        }
        return clipSchema[0].start
    })()
    const clipEnd = (() => {
        const lastInterval = clipSchema?.[clipSchema.length - 1]
        if (!lastInterval) {
            return 0
        }
        return lastInterval.start + lastInterval.duration
    })()
    const { startIndex, endIndex } = useMemo(() => {
        if (clipSchema.length === 0) {
            return { startIndex: 0, endIndex: 0 }
        }
        return getTranscriptRangeIndices(transcript, clipStart, clipEnd)
    }, [transcript, clipStart, clipEnd])

    useEffect(() => {
        if (words.length >= 1) {
            setTranscript(words)
            setIsLoading(false)
            if (router.isReady && prevScriptRef.current && transcriptEditorRef.current) {
                const scrollContainer = transcriptEditorRef.current
                const transcriptHeight = scrollContainer.scrollHeight - scrollContainer.clientHeight

                const scrollTop = (prevScriptRef.current.clientHeight / transcriptHeight) * 100
                transcriptEditorRef.current.scrollTo({
                    top: scrollTop,
                })
            }
        }
    }, [router.isReady, words, transcriptEditorRef, prevScriptRef])

    const inactiveWords = useMemo(() => {
        const wordsInClip = transcript.slice(startIndex, endIndex + 1)
        const durationThreshold = _c.INTERNAL_TIMELINE_TICK_SIZE_MS / 1_000
        const deletedRanges = clipSchema.reduce((acc, currentRange, i, list) => {
            if (i === 0) {
                return acc
            }
            const prevRange = list[i - 1]
            const prevRangeEnd = prevRange.start + prevRange.duration
            const rangeDiff = currentRange.start - prevRangeEnd
            if (rangeDiff > durationThreshold) {
                acc.push({
                    start: prevRangeEnd,
                    end: currentRange.start + durationThreshold,
                })
            }
            return acc
        }, [] as { start: number; end: number }[])
        const wordsInIntervals: Record<number, boolean> = {}
        deletedRanges.forEach((interval) => {
            wordsInClip.forEach((word) => {
                const isInactive = word.start >= interval.start && word.end <= interval.end
                if (isInactive) {
                    wordsInIntervals[word.start] = true
                }
            })
        })
        return wordsInIntervals
    }, [clipSchema, transcript, startIndex, endIndex])

    const currentClipTranscript = useMemo(
        () =>
            transcript
                .slice(startIndex, endIndex + 1)
                .map((word) => ({ ...word, active: !(word.start in inactiveWords) })),
        [transcript, startIndex, endIndex, inactiveWords],
    )

    const previousTranscript = useMemo(
        () => transcript.slice(0, startIndex),
        [transcript, startIndex],
    )
    const nextTranscript = useMemo(() => transcript.slice(endIndex + 1), [transcript, endIndex])
    const previousWords = useMemo(() => {
        const sentences = getSentencesFromTranscript(previousTranscript, SENTENCE_THRESHOLD, true)
        if (!sentences.length) {
            return []
        }
        return transcript.filter(({ start, end }) => {
            const thresholdIndex = SENTENCE_THRESHOLD - 1
            const endIndex =
                sentences.length >= SENTENCE_THRESHOLD ? thresholdIndex : sentences.length - 1
            return start >= sentences[0].start && end <= sentences[endIndex].end
        })
    }, [previousTranscript])
    const upcomingWords = useMemo(() => {
        const sentences = getSentencesFromTranscript(nextTranscript, SENTENCE_THRESHOLD)

        if (!sentences.length) {
            return []
        }

        return transcript.filter(({ start, end }) => {
            const thresholdIndex = SENTENCE_THRESHOLD - 1
            const endIndex =
                sentences.length >= SENTENCE_THRESHOLD ? thresholdIndex : sentences.length - 1
            return start >= sentences[0].start && end <= sentences[endIndex].end
        })
    }, [nextTranscript])

    const isWordHighlighted = (word?: ITranscriptWord) => {
        if (!word || highlightedWordRange === null) {
            return false
        }
        const [startWord, endWord] = highlightedWordRange
        if (word.start >= startWord.start && word.start <= endWord.start) {
            return true
        }
        return false
    }

    useEffect(() => {
        const intervalId = setInterval(() => {
            const video = videoRef.current
            if (!video) {
                return
            }
            const shouldPauseAtVideoEnd = video.currentTime >= clipEnd && !video.paused
            if (shouldPauseAtVideoEnd) {
                video.pause()
                return
            }
            maintainCaptionWord(video)
        }, 100)
        return () => {
            clearInterval(intervalId)
        }
    }, [videoRef, transcript, clipEnd])

    const handleCloseEditMode = () => {
        setHighlightStart(null)
        setHighlightEnd(null)
        setTooltipPosition(null)
        setCurrentEditingCaption(null)
        onChangeEditCaptionState(false)
    }
    const handleEditCaptionOnClick = (e: React.MouseEvent<HTMLButtonElement>) => {
        e.stopPropagation()
        setTooltipPosition(null)
        setCurrentEditingCaption(clickedWord!.text)
        onChangeEditCaptionState(true)
    }
    const handleClickDisableCaption = (e: React.MouseEvent<HTMLButtonElement>) => {
        e.stopPropagation()
        onDeleteWords(highlightedWordRange![0].start, highlightedWordRange![1].end)
        setTooltipPosition(null)
    }
    const handleClickEnableCaption = (e: React.MouseEvent<HTMLButtonElement>) => {
        e.stopPropagation()
        onEnableWords(highlightedWordRange![0].start, highlightedWordRange![1].end)
        setTooltipPosition(null)
    }
    const handleMouseDown = (event: React.MouseEvent<HTMLButtonElement>, word: ITranscriptWord) => {
        if (videoRef.current) {
            videoRef.current.currentTime = word.start
        }
        setTooltipPosition(null)
        setHighlightStart(word)
        setHighlightEnd(word)
        setIsDragging(true)
        // On clicking a word, cancel editing of caption
        setCurrentEditingCaption(null)
    }
    const handleDragMouseMove = (event: MouseEvent) => {
        if (!isDragging || !currentTranscriptRef.current) {
            return
        }

        // loop through items in transcript and find word closest to mouse pos
        let closestElementIndex = -1
        let closestDistance = Infinity
        const wordElements = Array.from(currentTranscriptRef.current!.children)
        wordElements.forEach((wordEl, i) => {
            if (wordEl.tagName !== "BUTTON") {
                return
            }
            const rect = wordEl.getBoundingClientRect()

            // Calculate the distance from the element's center to the target coordinate
            const centerX = rect.left + rect.width / 2
            const centerY = rect.top + rect.height / 2
            const distance = Math.sqrt(
                Math.pow(centerX - event.clientX, 2) + Math.pow(centerY - event.clientY, 2),
            )
            // Update closestChild if this child is closer to the target coordinates
            if (distance < closestDistance) {
                closestElementIndex = i
                closestDistance = distance
            }
        })
        const indexOffset = wordElements
            .slice(0, closestElementIndex + 1)
            .filter((a) => a.tagName !== "BUTTON").length
        const wordIndex = closestElementIndex! - indexOffset
        const closestWord = currentClipTranscript[wordIndex]
        setHighlightEnd(closestWord)
    }
    const handleDragMouseUp = () => {
        if (!isDragging) {
            setHighlightStart(null)
            setHighlightEnd(null)
            setTooltipPosition(null)
            return
        }
        // Get start and end locations of highlighted words
        if (highlightedWordRange === null) {
            setIsDragging(false)
            return
        }
        const transcriptEditor = transcriptEditorRef.current!
        const editorRect = transcriptEditor.getBoundingClientRect()
        const [startWord, endWord] = highlightedWordRange
        const startIndex = currentClipTranscript.findIndex((word) => word.start === startWord.start)
        const endIndex = currentClipTranscript.findIndex((word) => word.start === endWord.start)
        const wordElements = Array.from(currentTranscriptRef.current!.children)
            .filter((el) => el.tagName === "BUTTON")
            .slice(startIndex, endIndex + 1)
        const wordRects = wordElements.map((wordEl, i) => wordEl.getBoundingClientRect())
        const xCentre = (() => {
            const leftMostValue = wordRects.reduce((a, b) => (a.left < b.left ? a : b)).left
            const rightMostValue = wordRects.reduce((a, b) => (a.right > b.right ? a : b)).right
            const highlightWidth = rightMostValue - leftMostValue
            const scrollTargetX = leftMostValue - editorRect.x + transcriptEditor.scrollLeft
            return scrollTargetX + highlightWidth / 2
        })()
        const yCentre = (() => {
            const topMostValue = wordRects.reduce((a, b) => (a.top < b.top ? a : b)).top
            const scrollTargetY = topMostValue - editorRect.y + transcriptEditor.scrollTop
            return scrollTargetY
        })()
        setTooltipPosition({ x: xCentre, y: yCentre })
        setIsDragging(false)
    }
    const handleClickWord = (
        event: React.MouseEvent<HTMLButtonElement, MouseEvent>,
        word: ITranscriptWord,
    ) => {
        if (videoRef.current) {
            videoRef.current.currentTime = word.start
        }
        const transcriptEditor = transcriptEditorRef.current!
        const editorRect = transcriptEditor.getBoundingClientRect()
        const wordRect = event.currentTarget.getBoundingClientRect()
        const xAdjusted = (() => {
            const scrollTargetX = wordRect.x - editorRect.x + transcriptEditor.scrollLeft
            const centredX = scrollTargetX + wordRect.width / 2
            return centredX
        })()
        const yAdjusted = (() => {
            const scrollTargetY = wordRect.y - editorRect.y + transcriptEditor.scrollTop
            return scrollTargetY
        })()

        setHighlightStart(word)
        setHighlightEnd(word)
        setTooltipPosition({ x: xAdjusted, y: yAdjusted })
        // On clicking a word, cancel editing of caption
        setCurrentEditingCaption(null)
    }
    const handleClickEditCaption = (e: React.ChangeEvent<HTMLInputElement>) => {
        const newWord = e.currentTarget.value
        const dimensions = measureTextDimensions({
            text: newWord,
            fontFamily: e.currentTarget.style.fontFamily,
            fontSize: e.currentTarget.style.fontSize,
        })
        // extends input width as we type
        if (wordToEditRef.current) {
            wordToEditRef.current.style.width = "auto"
            wordToEditRef.current.style.width = `${dimensions.width + 3}px`
        }
        setCurrentEditingCaption(newWord)
    }
    const handleSaveEditedWord = (e?: KeyboardEvent | MouseEvent) => {
        // Save edited word
        if (isEditingWord) {
            onCaptionWordChange({
                start: clickedWord!.start,
                end: clickedWord!.end,
                text: currentEditingCaption ?? clickedWord?.text ?? "",
            })
            handleCloseEditMode()
        }
        // Close edit mode if clicked outside of tooltip
        if (!!e && !!e.target) {
            if (
                !tooltipRef.current ||
                (tooltipRef.current && !tooltipRef.current.contains(e.target as Node))
            ) {
                handleCloseEditMode()
            }
        }
    }
    const handleClickSetClipStart = (e: React.MouseEvent<HTMLButtonElement>) => {
        e.stopPropagation()
        onSelectStartWord(clickedWord!)
        handleCloseEditMode()
    }
    const handleClickSetClipEnd = (e: React.MouseEvent<HTMLButtonElement>) => {
        e.stopPropagation()
        onSelectEndWord(clickedWord!)
        handleCloseEditMode()
    }
    const handleKeyPress = useCallback(
        (e: KeyboardEvent) => {
            const BLOCKED_KEYS = ["ArrowLeft", "ArrowRight"]
            if (e.key === "Enter") {
                handleSaveEditedWord()
            }
            if (BLOCKED_KEYS.includes(e.key)) {
                e.stopPropagation()
            }
        },
        [currentEditingCaption, isEditingWord, clickedWord],
    )
    useDismissableOverlay(wordToEditRef, handleSaveEditedWord)
    useEffect(() => {
        window.addEventListener("keydown", handleKeyPress)
        window.addEventListener("mousemove", handleDragMouseMove)
        window.addEventListener("mouseup", handleDragMouseUp)
        return () => {
            window.removeEventListener("keydown", handleKeyPress)
            window.removeEventListener("mousemove", handleDragMouseMove)
            window.removeEventListener("mouseup", handleDragMouseUp)
        }
    }, [
        currentEditingCaption,
        isEditingWord,
        clickedWord,
        highlightStartWord,
        highlightEndWord,
        isDragging,
        currentClipTranscript,
    ])
    useEffect(() => {
        /// Scroll to current spoken word
        if (
            isDragging ||
            !transcriptEditorRef.current ||
            !currentSpokenWord ||
            !videoRef.current ||
            !prevScriptRef.current
        ) {
            return
        }

        const video = videoRef.current
        const wordIndex = currentClipTranscript.findIndex(
            (word) => word.start >= currentSpokenWord.start && word.end <= currentSpokenWord.end,
        )
        const transcriptEditor = transcriptEditorRef.current
        const prevScript = prevScriptRef.current

        const editorHeight = transcriptEditor.scrollHeight - transcriptEditor.clientHeight
        const initialScroll = (prevScript.clientHeight / editorHeight) * 100
        const targetScrollPosition = (wordIndex / currentClipTranscript.length) * editorHeight

        const scrollDif = Math.abs(targetScrollPosition - transcriptEditor.scrollTop)
        const hasPassedThreshold = scrollDif >= 40
        const isScrollingWithinClip = targetScrollPosition >= initialScroll
        const isPausedWithinClip = videoRef.current.paused && video.currentTime > clipStart
        if (hasPassedThreshold && (isScrollingWithinClip || isPausedWithinClip)) {
            transcriptEditor.scrollTo({
                top: targetScrollPosition,
            })
        }
    }, [videoRef, prevScriptRef, currentSpokenWord, currentClipTranscript, highlightStartWord])
    const maintainCaptionWord = useCallback(
        (video: HTMLVideoElement) => {
            const currentCaption = getTranscriptItemsInTimeRange({
                transcript,
                start: video.currentTime,
                end: clipEnd,
            })[0]
            if (!currentCaption) {
                setCurrentSpokenWord(currentClipTranscript[0])
                return
            }
            setCurrentSpokenWord(currentCaption)
        },
        [transcript, currentClipTranscript, clipEnd],
    )
    const renderTooltipMenu = () => {
        if (!tooltipPosition || !transcriptEditorRef.current) {
            return
        }
        const isSingleWord = !!clickedWord && highlightStartWord?.start !== highlightEndWord?.end
        const rangeName = isSingleWord ? "word" : "section"
        const isClickedWordInPreviousCaptions = isSingleWord && clickedWord.start < clipStart
        const isClickedWordInUpcomingCaptions = isSingleWord && clickedWord.start > clipEnd
        const showEditCaptionButton =
            !isClickedWordInPreviousCaptions && !isClickedWordInUpcomingCaptions && isSingleWord
        const showSetClipStartButton = isSingleWord && !isClickedWordInUpcomingCaptions
        const showSetClipEndButton = isSingleWord && !isClickedWordInPreviousCaptions
        const showDisableCaptionButton = (() => {
            if (isSingleWord) {
                return !(clickedWord.start in inactiveWords)
            }
            const [startWord, endWord] = highlightedWordRange!
            const highlightStartIndex = currentClipTranscript.findIndex(
                (w) => w.start === startWord.start,
            )
            const highlightEndIndex = currentClipTranscript.findIndex(
                (w) => w.start === endWord.start,
            )
            const highlightRange = currentClipTranscript.slice(
                highlightStartIndex,
                highlightEndIndex + 1,
            )
            const areAnyWordsActive = highlightRange.some((word) => !(word.start in inactiveWords))
            return areAnyWordsActive
        })()
        const showEnableCaptionButton = (() => {
            if (isSingleWord) {
                return clickedWord.start in inactiveWords
            }
            const highlightStartIndex = currentClipTranscript.findIndex(
                (w) => w.start === highlightedWordRange![0].start,
            )
            const highlightEndIndex = currentClipTranscript.findIndex(
                (w) => w.start === highlightedWordRange![1].start,
            )
            const highlightRange = currentClipTranscript.slice(
                highlightStartIndex,
                highlightEndIndex + 1,
            )
            const areAnyWordsInactive = highlightRange.some((word) => word.start in inactiveWords)
            return areAnyWordsInactive
        })()
        const TOOLTIP_OPTION_SIZE = 35
        const tooltipWidth =
            TOOLTIP_OPTION_SIZE *
            [
                showEditCaptionButton,
                showSetClipStartButton,
                showSetClipEndButton,
                showDisableCaptionButton,
                showEnableCaptionButton,
            ].filter(Boolean).length
        return (
            <Flex
                tabIndex={1}
                ref={tooltipRef}
                direction="row"
                position="absolute"
                justifyContent="flex-start"
                alignItems="center"
                borderRadius="5px"
                backgroundColor="var(--ui-separator-color)"
                width={tooltipWidth}
                top={tooltipPosition!.y - TOOLTIP_OPTION_SIZE}
                left={tooltipPosition!.x - tooltipWidth / 2}
            >
                {showEditCaptionButton && (
                    <Tooltip label="Edit caption" aria-label="edit caption" placement="top">
                        <IconButton
                            position="relative"
                            width={TOOLTIP_OPTION_SIZE}
                            variant="unstyled"
                            aria-label="Edit caption"
                            onMouseUp={handleEditCaptionOnClick}
                            _hover={{ backgroundColor: "var(--secondary-bg-color)" }}
                            icon={
                                <Image
                                    src="/images/snippets/edit_caption.svg"
                                    alt="Edit caption"
                                    width="full"
                                    padding={2}
                                />
                            }
                        />
                    </Tooltip>
                )}
                {showSetClipStartButton && (
                    <Tooltip
                        label="Set as clip start"
                        aria-label="set as clip start"
                        placement="top"
                    >
                        <IconButton
                            position="relative"
                            width={TOOLTIP_OPTION_SIZE}
                            variant="unstyled"
                            aria-label="Text select start"
                            // onClick hijacked by window event listener so we use onMouseUp
                            onMouseUp={handleClickSetClipStart}
                            _hover={{ backgroundColor: "var(--secondary-bg-color)" }}
                            icon={
                                <Image
                                    src="/images/snippets/text_select_start.svg"
                                    alt="Text select start"
                                    width="full"
                                    padding={2}
                                />
                            }
                        />
                    </Tooltip>
                )}
                {showSetClipEndButton && (
                    <Tooltip label="Set as clip end" aria-label="set as clip end" placement="top">
                        <IconButton
                            position="relative"
                            width={TOOLTIP_OPTION_SIZE}
                            variant="unstyled"
                            aria-label="text select end"
                            onMouseUp={handleClickSetClipEnd}
                            _hover={{ backgroundColor: "var(--secondary-bg-color)" }}
                            icon={
                                <Image
                                    alt="Text select end"
                                    src="/images/snippets/text_select_end.svg"
                                    width="full"
                                    padding="4px"
                                />
                            }
                        />
                    </Tooltip>
                )}
                {showDisableCaptionButton && (
                    <Tooltip
                        label={`Remove ${rangeName} from clip`}
                        aria-label={`Remove ${rangeName} from clip`}
                        placement="top"
                    >
                        <IconButton
                            width={TOOLTIP_OPTION_SIZE}
                            variant="unstyled"
                            aria-label={`Remove ${rangeName} from clip`}
                            // For some reason onClick is not being caught
                            onMouseUp={handleClickDisableCaption}
                            _hover={{ backgroundColor: "var(--secondary-bg-color)" }}
                            icon={
                                <Image
                                    alt="trash can"
                                    src="/images/snippets/delete_caption.svg"
                                    width="full"
                                    padding="10px"
                                />
                            }
                        />
                    </Tooltip>
                )}
                {showEnableCaptionButton && (
                    <Tooltip
                        label={`Add ${rangeName} back`}
                        aria-label={`Add ${rangeName} back`}
                        placement="top"
                    >
                        <IconButton
                            position="relative"
                            width={TOOLTIP_OPTION_SIZE}
                            variant="unstyled"
                            aria-label={`Add ${rangeName} back`}
                            onMouseUp={handleClickEnableCaption}
                            _hover={{ backgroundColor: "var(--secondary-bg-color)" }}
                            icon={
                                <Icon
                                    as={MdReplay}
                                    boxSize="full"
                                    color="white"
                                    width="full"
                                    padding="8px"
                                />
                            }
                        />
                    </Tooltip>
                )}
            </Flex>
        )
    }
    const renderInactiveWord = (word: ITranscriptWord) => {
        return (
            <Button
                key={word.start}
                color="var(--secondary-text-color)"
                fontWeight={400}
                height="unset"
                minW="unset"
                padding="2px"
                backgroundColor="var(--primary-bg-color)"
                onClick={(e) => handleClickWord(e, word)}
                _hover={{ backgroundColor: "var(--ui-separator-color)" }}
            >
                {word.text}
            </Button>
        )
    }
    const renderClipWord = (word: IClipWord, index: number) => {
        const isBeingEdited = isEditingWord && clickedWord?.start === word.start
        const requiresLineBreak = word.text === "." || word.text?.includes(".")
        if (isBeingEdited) {
            return (
                <Fragment key={word.start}>
                    <input
                        ref={wordToEditRef}
                        value={currentEditingCaption ?? clickedWord.text}
                        onChange={handleClickEditCaption}
                        autoFocus
                        size={clickedWord.text.length}
                        style={{
                            height: "unset",
                            minWidth: "unset",
                            backgroundColor: "transparent",
                            color: "white",
                            marginLeft: "2px",
                            padding: 0,
                            lineHeight: "unset",
                        }}
                    />
                    {requiresLineBreak && <Box marginBottom="15px" />}
                </Fragment>
            )
        }
        const isSpokenWord = currentSpokenWord?.start === word.start
        const isHighlighted = isWordHighlighted(word)
        const buttonStyle = (() => {
            const styles: ButtonProps = {
                backgroundColor: "transparent",
                color: "white",
                borderRadius: "2px",
            }
            if (isSpokenWord) {
                styles["backgroundColor"] = "var(--active-word-color)"
                styles["color"] = "black"
                styles["_hover"] = { ...styles }
            } else if (isHighlighted) {
                styles["backgroundColor"] = "#474953"
                styles["color"] = "white"
                styles["borderRadius"] = "0px"
                styles["_hover"] = { ...styles }
            } else {
                styles["_hover"] = {
                    backgroundColor: "var(--ui-separator-color)",
                    color: "white",
                }
            }
            if (!word.active) {
                styles["textDecoration"] = "line-through"
            }
            return styles
        })()
        return (
            <Fragment key={word.start}>
                <Button
                    display="inline-block"
                    onMouseDown={(e) => handleMouseDown(e, word)}
                    onDoubleClick={handleEditCaptionOnClick}
                    padding="2px"
                    height="unset"
                    minW="unset"
                    borderRadius="2px"
                    fontWeight={400}
                    transition="none"
                    {...buttonStyle}
                >
                    {word.text}
                </Button>
                {requiresLineBreak && <Box marginBottom="15px" />}
            </Fragment>
        )
    }
    const renderTranscript = () => {
        if (isLoading) {
            return (
                <Flex width="100%" height="100%" justifyContent="center" alignItems="center">
                    <Spinner color="#62D3A3" size="xl" thickness="3px" />
                </Flex>
            )
        }
        return (
            <>
                <Box ref={prevScriptRef}>{previousWords.map(renderInactiveWord)}</Box>
                <Box
                    ref={currentTranscriptRef}
                    display="block"
                    marginY="20px"
                    scrollMarginTop="50px"
                    color="white"
                >
                    {currentClipTranscript.map(renderClipWord)}
                </Box>
                <Box>{upcomingWords.map(renderInactiveWord)}</Box>
            </>
        )
    }
    return (
        <>
            <Flex
                ref={transcriptEditorRef}
                flexDirection="column"
                position="relative"
                width="full"
                height="var(--transcript-editor-height)"
                overflowY="scroll"
                padding="24px"
                lineHeight="24px"
                userSelect="none"
                sx={{
                    "::-webkit-scrollbar": {
                        display: "none",
                    },
                }}
            >
                {renderTranscript()}
                {isTooltipVisible && renderTooltipMenu()}
            </Flex>
        </>
    )
}

export default EditorTranscript
