import React, { RefObject, useCallback, useEffect, useState } from "react";
import { useMicVAD } from "@ricky0123/vad-react";
import { useLocalStorage } from "@uidotdev/usehooks";

// Global disable setting for the dedicated algorithm
const DisableDedicatedVAD = true;

/**
 * Analyzes the audio data from the given video element's main audio track.
 * Returns whether there is active voice audio above or below a specified threshold.
 *
 * @param videoRef Reference to the HTMLVideoElement containing the video stream.
 * @param threshold Threshold value (default: 5) for detecting active voice.
 * @returns Promise that resolves to true if active voice is detected, false otherwise.
 */
export const checkIsVoiceActive = (
  videoRef: React.RefObject<HTMLVideoElement>,
  threshold = 5
): Promise<boolean> =>
  new Promise((resolve) => {
    const mediaStream = videoRef.current?.srcObject as MediaStream;
    if (!mediaStream) {
      resolve(false);
      return;
    }

    const mainAudioTrack = mediaStream.getAudioTracks()[0];
    if (!mainAudioTrack) {
      resolve(false);
      return;
    }

    const AudioContext =
      window.AudioContext || (window as any).webkitAudioContext;
    const audioContext = new AudioContext();

    if (audioContext.state === "suspended") {
      audioContext.resume();
    }

    const mediaStreamSource = audioContext.createMediaStreamSource(
      new MediaStream([mainAudioTrack])
    );

    const analyser = audioContext.createAnalyser();
    analyser.fftSize = 2048;

    mediaStreamSource.connect(analyser);

    const dataArray = new Uint8Array(analyser.fftSize);

    const checkData = () => {
      analyser.getByteTimeDomainData(dataArray);

      const isActive = dataArray.some(
        (value) => value > 128 + threshold || value < 128 - threshold
      );

      resolve(isActive);

      // Clean up
      mediaStreamSource.disconnect();
      analyser.disconnect();
      audioContext.close();
    };

    setTimeout(checkData, 100); // Give some time for the analyser to gather data
  });

/**
 * The dedicated voice activity detector uses these packages.
 * These are loaded via script tags in non-react applications, and via npm packages in apps not created by create-react-app.
 * For React apps the Webpack config should be modified, which is not easily doable without using "npm run eject".
 * This will essentially do the same as a script tag (load the resource from CDN), just more convenient to use in React.
 * This means these packages should be checked for updates manually (or wait for the create-react-app support).
 */
const micVADUrls = {
  workletURL:
    "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.7/dist/vad.worklet.bundle.min.js",
  modelURL:
    "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.7/dist/silero_vad.onnx",
  ortWasmURL: "https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/",
};

// Utility function to detect browser by name
const isBrowser = (name: string): boolean =>
  typeof navigator !== "undefined" &&
  navigator.userAgent.toLowerCase().includes(name.toLowerCase());

// List of browsers where the dedicated algorithm does not work properly
// Firefox: Does not detect voice sometimes for 2 people, unless the tab is muted
const BuggedBrowsers: string[] = ["Firefox"];

type VoiceDetectionAlgorithm = "dedicated" | "custom";

// Uses custom algorithm for browsers where the dedicated is bugged
const getAlgorithmForBrowser = (): VoiceDetectionAlgorithm => {
  if (BuggedBrowsers.some((browser) => isBrowser(browser))) {
    return "custom";
  }
  return "dedicated";
};

export const useVoiceDetector = (
  videoRef: RefObject<HTMLVideoElement>,
  micEnabled = true,
  micMissing = false,
  defaultAlgorithm: VoiceDetectionAlgorithm = DisableDedicatedVAD
    ? "custom"
    : getAlgorithmForBrowser()
): boolean => {
  // The dedicated algorithm won't start if it detects a missing microphone for the local user
  // We have to use the custom algorithm in this case (to detecting other users' voice)
  const [localUserMicMissing] = useLocalStorage<boolean>("micMissing");
  const algorithm = localUserMicMissing ? "custom" : defaultAlgorithm;

  // This is the dedicated voice activity detector for the browser
  const vad = useMicVAD({
    startOnLoad: algorithm === "dedicated",
    workletURL: micVADUrls.workletURL,
    modelURL: micVADUrls.modelURL,
    ortConfig: (ort) => {
      ort.env.wasm.wasmPaths = micVADUrls.ortWasmURL;
    },
    onSpeechStart: () => {
      /* This stops spam in the console */
    },
    onSpeechEnd: () => {
      /* This stops spam in the console */
    },
    stream: videoRef.current?.srcObject as MediaStream,
  });

  // This is the custom voice activity detector for the browser
  const [isVoiceActive, setIsVoiceActive] = useState<boolean>(false);
  const checkVoice = useCallback(async () => {
    if (micMissing || !micEnabled) {
      return;
    }

    setIsVoiceActive(await checkIsVoiceActive(videoRef));
  }, [micEnabled, micMissing, videoRef]);

  useEffect(() => {
    if (algorithm === "dedicated") {
      return undefined;
    }

    // Check every 100 ms
    const intervalId = setInterval(checkVoice, 100);

    return () => {
      clearInterval(intervalId);
    };
  }, [algorithm, checkVoice]);

  return algorithm === "dedicated" ? vad.userSpeaking : isVoiceActive;
};
