import React, { useCallback, useEffect, useRef, useState} from 'react';
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import './SpeechMode.scss';
import { WebSocketMessage } from '../../../utilities/transformMessages';
import GradientIcon, { GradientIconTypes } from '../../Icon/GradientIcon';
import Icon from '../../Icon';

interface SpeechModeProps {
    isSpeechMode: boolean,
    setIsSpeechMode: (value: boolean) => void,
    messages: WebSocketMessage[],
    sendMessage: (message: string, speech: boolean) => void,
    isProcessing: boolean
}

const LINE_NUMBER = 107;
const HEIGHT_COEFFICIENT = 0.33;
const OPACITY_COEFFICIENT = 0.0067;
const LINE_HEIGHT_DEFAULT = 4;
const OPACITY_MIN = 0.2;

const SpeechMode = ({isSpeechMode, setIsSpeechMode, messages, sendMessage, isProcessing}: SpeechModeProps ) => {
    const [time, setTime] = useState(0);
    const [text, setText] = useState('');
    const [lastMessage, setLastMessage] = useState(null);
    const [isRunning, setIsRunning] = useState(false);
    const [detectedLanguage, setDetectedLanguage] = useState('');
    
    const textRef = useRef(text);
    const playerRef = useRef(null);
    const microphoneRef = useRef(null);
    const recognizerRef = useRef(null);
    const timeoutRef = useRef(null);
    const isSynthesizingRef = useRef(false);
    const isPlayingAudioRef = useRef(false);
    const isLoadingAudioRef = useRef(true);
    const isUsingSpeechModeRef = useRef(false);
    const language = 'en-US';
    const languageRef = useRef(language);
    const elementsRef = useRef([]);
    const analyserRef = useRef(null);
    const timerRef = useRef<NodeJS.Timeout | null>(null);

    const formatTime = (timeInSeconds: number) => {
        const minutes = Math.floor(timeInSeconds / 60);
        const seconds = timeInSeconds % 60;
        return `${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
    };

    const startTimer = () => {
        if (!isRunning) {
            setIsRunning(true);
            timerRef.current = setInterval(() => {
                setTime((prevTime) => prevTime + 1);
            }, 1000);
        }
    };

    const stopTimer = useCallback(() => {
        if (isRunning) {
            setIsRunning(false);
            if (timerRef.current) {
                clearInterval(timerRef.current);
                timerRef.current = null;
            }
        }
    }, [isRunning]);

    useEffect(() => {
        return () => {
            if (timerRef.current) {
                clearInterval(timerRef.current);
            }
        };
    }, []);
    

    useEffect(() => {
        languageRef.current = language;
    }, [language]);

    useEffect(() => {
        isUsingSpeechModeRef.current = isSpeechMode;
    }, [isSpeechMode]);

    useEffect(() => {
        textRef.current = text;
    }, [text]);

    const playNotificationSound = () => {
        const audio = new Audio('/notification.mp3');
        audio.volume = 0.5;
        audio.oncanplaythrough = () => {
            audio.play().catch((error) => {
                console.error('Error playing audio', error);
            });
        };
        audio.onerror = (e) => {
            console.error('Error loading audio file:', e);
        };
    };

    sdk.Recognizer.enableTelemetry(false);
    
    const stopContinuousRecognition = useCallback(() => {
        if (recognizerRef.current) {
            recognizerRef.current.stopContinuousRecognitionAsync(
                () => {
                    stopTimer();
                    setTime(0);
                    console.log('Recognition stopped successfully.');
                },
                (err) => {
                    console.error(`Failed to stop continuous recognition: ${err}`);
                }
            );
        }
    }, [stopTimer]);

    const handleSendMessage = useCallback(() => {
        let textNow = textRef.current;
        if (isPlayingAudioRef.current && isLoadingAudioRef.current) {
            setText('');
            return;
        }

        if (textNow.trim() !== '') {
            sendMessage(textNow, isUsingSpeechModeRef.current);
            stopContinuousRecognition();
            isLoadingAudioRef.current = true;
            isPlayingAudioRef.current = true;
            setText('');
        }
    }, [sendMessage, stopContinuousRecognition]);

    const resetTimeout = useCallback(() => {
        if (timeoutRef.current) {
            clearTimeout(timeoutRef.current);
        }
        timeoutRef.current = setTimeout(() => {
            if (textRef.current.trim() === '') {
                return;
            }
            handleSendMessage();
        }, 2000);
    }, [handleSendMessage]);

    const startContinuousRecognition = useCallback(() => {
        if (!isUsingSpeechModeRef.current || isSynthesizingRef.current || isPlayingAudioRef.current) {
            console.log('Speech mode is not ready or active.');
            return;
        }

        try {
            playNotificationSound();
            startTimer();

            const speechConfig = sdk.SpeechConfig.fromSubscription(
                process.env.REACT_APP_SPEECH_KEY,
                process.env.REACT_APP_SPEECH_REGION
            );
            if (!speechConfig) {
                console.error('Speech configuration could not be created.');
                return;
            }

            const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages([
                "en-US",
                "de-DE",
            ]);

            const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
            if (!audioConfig) {
                console.error('Audio configuration could not be created.');
                return;
            }

            const recognizer = sdk.SpeechRecognizer.FromConfig(speechConfig, autoDetectConfig, audioConfig);
            recognizerRef.current = recognizer;

            recognizer.recognizing = (
                s,
                e) => {
                console.log(`RECOGNIZING: Text=${e.result.text}`);
                resetTimeout();
            };

            recognizer.recognized = (
                s,
                e) => {
                if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
                    const detectedLanguage = sdk.AutoDetectSourceLanguageResult.fromResult(e.result);
                    setDetectedLanguage(detectedLanguage.language);
                    console.log(`RECOGNIZED: Text=${e.result.text}`);
                    let oldText = textRef.current;
                    if (e.result.text !== "Play.") {
                        console.log(`SET: Text=${oldText + " " + e.result.text}`);
                        setText(e.result.text);
                    }
                    resetTimeout();
                } else if (e.result.reason === sdk.ResultReason.NoMatch) {
                    console.log('NOMATCH: Speech could not be recognized.');
                }
            };

            recognizer.canceled = (
                s,
                e) => {
                stopTimer();
                setTime(0);
                console.log(`CANCELED: Reason=${e.reason}`);
                if (e.reason === sdk.CancellationReason.Error) {
                    console.error(`CANCELED: ErrorCode=${e.errorCode}`);
                    console.error(`CANCELED: ErrorDetails=${e.errorDetails}`);
                    console.error(
                        'CANCELED: Did you set the speech resource key and region values?'
                    );
                }
                recognizer.stopContinuousRecognitionAsync(() => {
                    stopTimer();
                    setTime(0);
                    console.log('Recognition stopped due to cancellation.');
                });
            };

            recognizer.sessionStopped = (
                s,
                e) => {
                console.log('Session stopped.');
                recognizer.stopContinuousRecognitionAsync(() => {
                    console.log('Recognition stopped due to session end.');
                });
            };

            recognizer.startContinuousRecognitionAsync(
                () => {
                    console.log('Continuous recognition started successfully.');
                },
                (err) => {
                    console.error(`Failed to start continuous recognition: ${err}`);
                }
            );
        } catch (error) {
            stopTimer();
            setTime(0);
            console.error(`Error in startContinuousRecognition: ${error}`);
        }
    // eslint-disable-next-line react-hooks/exhaustive-deps
    }, [isSynthesizingRef, isPlayingAudioRef, resetTimeout]);

    const handleButtonClick = useCallback((textToSynthesize) => {
        const speechConfig = sdk.SpeechConfig.fromSubscription(
            process.env.REACT_APP_SPEECH_KEY,
            process.env.REACT_APP_SPEECH_REGION
        );
        if (detectedLanguage === 'en-US') {
            speechConfig.speechSynthesisVoiceName = 'en-US-AvaMultilingualNeural';
        } else if (detectedLanguage === 'de-DE') {
            speechConfig.speechSynthesisVoiceName = 'de-DE-KatjaNeural';
        }

        const player = new sdk.SpeakerAudioDestination();
        const audioConfig = sdk.AudioConfig.fromSpeakerOutput(player);
        const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);

        playerRef.current = player;
        isSynthesizingRef.current = true;

        player.onAudioStart = () => {
            console.log('Audio started.');
            isLoadingAudioRef.current = false;
            isPlayingAudioRef.current = true;
        };

        player.onAudioEnd = () => {
            console.log('Audio ended.');
            isPlayingAudioRef.current = false;
            isLoadingAudioRef.current = true;

            // Wait for half a second before starting recognition
            setTimeout(() => {
                startContinuousRecognition();
            }, 500);
        };

        synthesizer.speakTextAsync(
            textToSynthesize,
            (result) => {
                if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
                    console.log('Synthesis finished.');
                } else {
                    console.error(
                        'Speech synthesis canceled, ' +
                        result.errorDetails +
                        '\nDid you set the speech resource key and region values?'
                    );
                }
                synthesizer.close();
                isSynthesizingRef.current = false;
            },
            (err) => {
                console.trace('err - ' + err);
                synthesizer.close();
                isSynthesizingRef.current = false;
            }
        );

        console.log('Now synthesizing to default speaker.');
    }, [startContinuousRecognition]);


    const handleInterrupt = () => {
        if (playerRef.current) {
            playerRef.current.pause();
            isPlayingAudioRef.current = false;
            isLoadingAudioRef.current = false;
            stopContinuousRecognition();
            stopTimer();
            setTime(0);
            clearInterval(timerRef.current);
        }
    };

    const startRecognitionRef = useRef(startContinuousRecognition);
    const stopRecognitionRef = useRef(stopContinuousRecognition);

    useEffect(() => {
        startRecognitionRef.current = startContinuousRecognition;
        stopRecognitionRef.current = stopContinuousRecognition;
    }, [startContinuousRecognition, stopContinuousRecognition]);

    useEffect(() => {
        if (isSpeechMode) {
            startRecognitionRef.current();
        } else {
            stopRecognitionRef.current();
            playerRef.current?.pause();
            isPlayingAudioRef.current = false;
        }
        return () => stopRecognitionRef.current();
    }, [isSpeechMode]);

    useEffect(() => {
        if (messages.length > 0 && isSpeechMode) {
            const lastMsg = messages[messages.length - 1];
            if (lastMsg !== lastMessage && !lastMsg.isUser) {
                setLastMessage(lastMsg);
                if (
                    lastMsg.finished &&
                    lastMsg.speechMode &&
                    isLoadingAudioRef.current
                ) {
                    handleButtonClick(lastMsg.content);
                    isLoadingAudioRef.current = false;
                }
            }
        }
    }, [messages, lastMessage, isSpeechMode, handleButtonClick]);

    useEffect(() => {
        let animationId = 0;
        const handleSuccess = (stream: any) => {
            // @ts-ignore
            const audioContext = new (window.AudioContext || window.webkitAudioContext)();
            const source = audioContext.createMediaStreamSource(stream);

            const gainNode = audioContext.createGain();
            gainNode.gain.value = 0.5; // Set initial gain value (volume), you can adjust this value

            const analyser = audioContext.createAnalyser();
            analyser.fftSize = 256;

            source.connect(gainNode);
            gainNode.connect(analyser);
            analyserRef.current = analyser;

            const dataArray = new Uint8Array(analyser.frequencyBinCount);

            const animate = () => {
                if (analyserRef.current) {
                    animationId = requestAnimationFrame(animate);
                    analyserRef.current.getByteFrequencyData(dataArray);
            
                    elementsRef.current.forEach((line, index) => {
                        if (line) {
                            const value = dataArray[index];
                            const height = 
                                value && (value * HEIGHT_COEFFICIENT) > LINE_HEIGHT_DEFAULT 
                                    ? value * HEIGHT_COEFFICIENT : LINE_HEIGHT_DEFAULT;
                            line.style.height = `${height}px`;
                            line.style.opacity = `${Math.max(OPACITY_MIN, value * OPACITY_COEFFICIENT)}`;   
                        }
                    });
                }
            };
            animate();
        };

        if (isSpeechMode) {
            navigator.mediaDevices.getUserMedia({ audio: true }).then(handleSuccess);
        }

        return () => {
            cancelAnimationFrame(animationId);
            if (microphoneRef.current) {
                // eslint-disable-next-line react-hooks/exhaustive-deps
                clearInterval(microphoneRef.current);
            }
            if (analyserRef.current) {
                analyserRef.current.disconnect();
                analyserRef.current = null;
            }
        };
    }, [isSpeechMode]);

    return (
        <div className="speech-mode-outer-wrapper">
            <div className="speech-mode-shadow"></div>
            <div className="speech-mode-inner-wrapper">
                <div className="speech-mode-timer">{formatTime(time)}</div>
                <div className="speech-mode-status">
                    { isProcessing ? 'Processing...' : 'Listening...'}
                </div>
                <div className="speech-wave-wrapper">
                <div className="speech-wave-mask left"></div>
                    {
                        Array(LINE_NUMBER)
                            .fill(0)
                            .map((_, index) => (
                                <div key={index} className="speech-wave-line" ref={(el) => (elementsRef.current[index] = el)} />
                            ))
                    }
                    <div className="speech-wave-mask right"></div>
                </div>
                <GradientIcon
                    type={GradientIconTypes.COLORED}
                    icon={<Icon type="cross" className="menu-icon" />}
                    isActive
                    className="text-input__microphone "
                    shadow={true}
                    onClick={() => {handleInterrupt(); setIsSpeechMode(false)}}
                />
            </div>
        </div>
    );
};

export default SpeechMode;
