/*
 * Copyright © 2018-2025, GlobalVET AB
 *
 * All rights reserved. No part or the whole of this source code and the compiled program
 * may be reproduced, copied, distributed, disseminated to the public, adapted or transmitted
 * in any form or by any means, including photocopying, recording, or other electronic or
 * mechanical methods, without the prior written permission of GlobalVET AB. This source code
 * and the compiled program may only be used for the purposes of GlobalVET AB. This source code
 * and the compiled program shall be kept confidential and shall not be made public or made
 * available or disclosed to any unauthorized person. Any dispute or claim arising out of the
 * breach of these provisions shall be governed by and construed in accordance with the
 * laws of Sweden.
 */

import React, { useCallback, useEffect, useState } from "react";
import { useMicVAD } from "@ricky0123/vad-react";
import { useLocalStorage } from "@uidotdev/usehooks";
import Bowser from "bowser";
import { getAudioContext } from "./AudioContext";

// Global disable setting for the dedicated algorithm
const DisableDedicatedVAD = true;

const browser = Bowser.getParser(window.navigator.userAgent);

/**
 * Analyzes the audio data from the given video element's main audio track.
 * Returns whether there is active voice audio above or below a specified threshold.
 *
 * @param mediaElement Reference to the HTMLMediaElement containing the media stream with the audio track.
 * @param threshold Threshold value (default: 3) for detecting active voice.
 * @returns Promise that resolves to true if active voice is detected, false otherwise.
 */
export const checkIsVoiceActive = (mediaElement: HTMLMediaElement | null, threshold = 3): Promise<boolean> =>
  new Promise((resolve) => {
    if (!mediaElement) {
      resolve(false);
      return;
    }

    const mediaStream = mediaElement.srcObject as MediaStream;
    if (!mediaStream) {
      resolve(false);
      return;
    }

    const mainAudioTrack = mediaStream.getAudioTracks()[0];
    if (!mainAudioTrack) {
      resolve(false);
      return;
    }

    const audioContext = getAudioContext();
    const mediaStreamSource = audioContext.createMediaStreamSource(new MediaStream([mainAudioTrack]));

    const analyser = audioContext.createAnalyser();
    analyser.fftSize = 512;

    mediaStreamSource.connect(analyser);

    const dataArray = new Uint8Array(analyser.fftSize);

    const checkData = () => {
      analyser.getByteTimeDomainData(dataArray);

      let sum = 0;
      const step = 4;

      // Process every 4th sample to reduce CPU load
      for (let i = 0; i < dataArray.length; i += step) {
        const amplitude = dataArray[i] - 128; // Normalize around 0
        sum += amplitude * amplitude;
      }

      const rms = Math.sqrt(sum / (dataArray.length / step));
      const isActive = rms > threshold;

      resolve(isActive);

      // Cleanup
      mediaStreamSource.disconnect();
      analyser.disconnect();
    };

    setTimeout(checkData, 100); // Give some time for the analyser to gather data
  });

/**
 * The dedicated voice activity detector uses these packages.
 * These are loaded via script tags in non-react applications, and via npm packages in apps not created by create-react-app.
 * For React apps the Webpack config should be modified, which is not easily doable without using "npm run eject".
 * This will essentially do the same as a script tag (load the resource from CDN), just more convenient to use in React.
 * This means these packages should be checked for updates manually (or wait for the create-react-app support).
 */
const micVADUrls = {
  workletURL: "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.7/dist/vad.worklet.bundle.min.js",
  modelURL: "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.7/dist/silero_vad.onnx",
  ortWasmURL: "https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/",
};

// Utility function to detect browser by name
const isBrowser = (name: string): boolean => browser.getBrowserName(true) === name.toLowerCase();

// List of browsers where the voice detector does not work properly
// Safari: Voice detection causes crash sometimes
const BuggedBrowsers: string[] = ["Safari"];

// List of browsers where the dedicated algorithm does not work properly
// Firefox: Does not detect voice sometimes for 2 people, unless the tab is muted
const BuggedBrowsersDedicated: string[] = ["Firefox"];

type VoiceDetectionAlgorithm = "dedicated" | "custom" | "none";

// Uses custom algorithm for browsers where the dedicated is bugged
export const getAlgorithmForBrowser = (): VoiceDetectionAlgorithm => {
  if (BuggedBrowsers.some((browser) => isBrowser(browser))) {
    return "none";
  }

  if (!BuggedBrowsersDedicated.some((browser) => isBrowser(browser)) && !DisableDedicatedVAD) {
    return "dedicated";
  }

  return "custom";
};

export const useVoiceDetector = (
  mediaElement: HTMLMediaElement | null,
  micEnabled = true,
  micMissing = false,
  defaultAlgorithm: VoiceDetectionAlgorithm = getAlgorithmForBrowser()
): boolean => {
  // The dedicated algorithm won't start if it detects a missing microphone for the local user
  // We have to use the custom algorithm in this case (to detecting other users' voice)
  const [localUserMicMissing] = useLocalStorage<boolean>("micMissing");
  const algorithm = localUserMicMissing ? "custom" : defaultAlgorithm;

  let vad = null;

  if (algorithm === "dedicated") {
    // This is the dedicated voice activity detector for the browser
    // Conditionally calling this hook is fine, since this will either run every time or never
    // eslint-disable-next-line react-hooks/rules-of-hooks
    vad = useMicVAD({
      startOnLoad: algorithm === "dedicated",
      workletURL: micVADUrls.workletURL,
      modelURL: micVADUrls.modelURL,
      ortConfig: (ort) => {
        ort.env.wasm.wasmPaths = micVADUrls.ortWasmURL;
      },
      onSpeechStart: () => {
        // This stops spam in the console
      },
      onSpeechEnd: () => {
        // This stops spam in the console
      },
      stream: (mediaElement?.srcObject as MediaStream) || undefined,
    });
  }

  // This is the custom voice activity detector for the browser
  const [isVoiceActive, setIsVoiceActive] = useState<boolean>(false);

  const checkVoice = useCallback(async () => {
    if (micMissing || !micEnabled) {
      return;
    }

    const isVoiceActive = await checkIsVoiceActive(mediaElement);
    setIsVoiceActive(isVoiceActive);
  }, [mediaElement, micEnabled, micMissing]);

  useEffect(() => {
    if (algorithm === "dedicated" || algorithm === "none") {
      return undefined;
    }

    // Check every 100 ms
    const intervalId = setInterval(checkVoice, 100);

    return () => {
      clearInterval(intervalId);
    };
  }, [algorithm, checkVoice]);

  return algorithm === "dedicated" && !!vad ? vad.userSpeaking : isVoiceActive;
};
