feat: Enhance audio capture and monitoring features

- Added "audioCapture" permission to manifest for microphone access. - Introduced DeepSeek as a new AI provider option in the side panel. - Implemented a capture mode selection (tab-only, mic-only, mixed) in the side panel. - Added options to enable/disable the extension and auto-open the assistant window. - Integrated a mic monitor feature with live input level visualization. - Included buttons for requesting microphone permission and granting tab access. - Updated styles for new sections and mic level visualization. - Enhanced model fetching logic to support DeepSeek and improved error handling.
2026-01-31 21:55:09 +01:00
parent 246506b177
commit 56d56395ee
11 changed files with 1651 additions and 276 deletions
--- a/content.js
+++ b/content.js
@@ -1,19 +1,89 @@
 let audioContext;
 let mediaStream;
 let recognition;
+let isCapturing = false;
+let overlayInitialized = false;
+let activeCaptureMode = 'tab';
+let overlayListening = false;
+let overlayHidden = false;
+let analyserNode = null;
+let meterSource = null;
+let meterRaf = null;

 chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
  if (request.action === 'startCapture') {
+    activeCaptureMode = 'tab';
    startCapture(request.streamId);
-    sendResponse({success: true});
-  } else if (request.action === 'stopCapture') {
-    stopCapture();
-    sendResponse({success: true});
+    sendResponse({ success: true });
+    return false;
  }
-  return true; // Keep the message channel open for async responses
+  if (request.action === 'startMicCapture') {
+    activeCaptureMode = 'mic';
+    startMicCapture();
+    sendResponse({ success: true });
+    return false;
+  }
+  if (request.action === 'startMixedCapture') {
+    activeCaptureMode = 'mixed';
+    startMixedCapture(request.streamId);
+    sendResponse({ success: true });
+    return false;
+  }
+  if (request.action === 'stopCapture') {
+    stopCapture();
+    sendResponse({ success: true });
+    return false;
+  }
+  if (request.action === 'requestMicPermission') {
+    requestMicPermission().then(sendResponse);
+    return true;
+  }
+  if (request.action === 'updateTranscript') {
+    updateOverlay('transcript', request.transcript);
+    return false;
+  }
+  if (request.action === 'updateAIResponse') {
+    updateOverlay('response', request.response);
+    return false;
+  }
+  if (request.action === 'showOverlay') {
+    setOverlayHidden(false);
+    return false;
+  }
+  if (request.action === 'hideOverlay') {
+    setOverlayHidden(true);
+    return false;
+  }
+  return false;
 });

+async function requestMicPermission() {
+  try {
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    stream.getTracks().forEach(track => track.stop());
+    return { success: true };
+  } catch (error) {
+    let errorMessage = 'Microphone permission denied.';
+    if (error.name === 'NotAllowedError') {
+      errorMessage = 'Microphone permission denied.';
+    } else if (error.name === 'NotFoundError') {
+      errorMessage = 'No microphone found.';
+    } else {
+      errorMessage = error.message || 'Unknown error occurred.';
+    }
+    return { success: false, error: errorMessage };
+  }
+}
+
 function startCapture(streamId) {
+  isCapturing = true;
+  overlayListening = true;
+  ensureOverlay();
+  updateOverlayIndicator();
+  updateOverlay(
+    'response',
+    'Tab audio is captured, but speech recognition uses the microphone. Use mic or mixed mode if you want transcription.'
+  );
  navigator.mediaDevices.getUserMedia({
    audio: {
      chromeMediaSource: 'tab',
@@ -22,37 +92,10 @@ function startCapture(streamId) {
  }).then((stream) => {
    mediaStream = stream;
    audioContext = new AudioContext();
-    const source = audioContext.createMediaStreamSource(stream);
-
-    // Initialize speech recognition
-    recognition = new webkitSpeechRecognition();
-    recognition.continuous = true;
-    recognition.interimResults = true;
-
-    recognition.onresult = function(event) {
-      let finalTranscript = '';
-      for (let i = event.resultIndex; i < event.results.length; ++i) {
-        if (event.results[i].isFinal) {
-          finalTranscript += event.results[i][0].transcript;
-        }
-      }
-      
-      if (finalTranscript.trim() !== '') {
-        chrome.runtime.sendMessage({action: 'updateTranscript', transcript: finalTranscript});
-        
-        // Check if the transcript contains a question
-        if (isQuestion(finalTranscript)) {
-          chrome.runtime.sendMessage({action: 'getAIResponse', question: finalTranscript});
-        }
-      }
-    };
-
-    recognition.onerror = function(event) {
-      console.error('Speech recognition error:', event.error);
-      chrome.runtime.sendMessage({action: 'updateAIResponse', response: `Speech recognition error: ${event.error}. Please try again.`});
-    };
-
-    recognition.start();
+    createAudioMeter(stream);
+    if (ensureSpeechRecognitionAvailable()) {
+      startRecognition();
+    }
  }).catch((error) => {
    console.error('Error starting capture:', error);
    let errorMessage = 'Failed to start audio capture. ';
@@ -64,23 +107,473 @@ function startCapture(streamId) {
      errorMessage += error.message || 'Unknown error occurred.';
    }
    chrome.runtime.sendMessage({action: 'updateAIResponse', response: errorMessage});
+    updateOverlay('response', errorMessage);
+    overlayListening = false;
+    updateOverlayIndicator();
  });
 }

+function startMicCapture() {
+  isCapturing = true;
+  overlayListening = true;
+  ensureOverlay();
+  updateOverlayIndicator();
+  navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
+    mediaStream = stream;
+    audioContext = new AudioContext();
+    createAudioMeter(stream);
+    if (ensureSpeechRecognitionAvailable()) {
+      startRecognition();
+    }
+  }).catch((error) => {
+    console.error('Error starting mic capture:', error);
+    let errorMessage = 'Failed to start microphone capture. ';
+    if (error.name === 'NotAllowedError') {
+      errorMessage += 'Please allow microphone access and try again.';
+    } else if (error.name === 'NotFoundError') {
+      errorMessage += 'No microphone found.';
+    } else {
+      errorMessage += error.message || 'Unknown error occurred.';
+    }
+    chrome.runtime.sendMessage({action: 'updateAIResponse', response: errorMessage});
+    updateOverlay('response', errorMessage);
+    overlayListening = false;
+    updateOverlayIndicator();
+  });
+}
+
+function startMixedCapture(streamId) {
+  isCapturing = true;
+  overlayListening = true;
+  ensureOverlay();
+  updateOverlayIndicator();
+  navigator.mediaDevices.getUserMedia({
+    audio: {
+      chromeMediaSource: 'tab',
+      chromeMediaSourceId: streamId
+    }
+  }).then((stream) => {
+    mediaStream = stream;
+    audioContext = new AudioContext();
+    createAudioMeter(stream);
+    if (ensureSpeechRecognitionAvailable()) {
+      startRecognition();
+    }
+  }).catch((error) => {
+    console.error('Error starting mixed capture:', error);
+    chrome.runtime.sendMessage({action: 'updateAIResponse', response: 'Failed to start mixed capture.'});
+    updateOverlay('response', 'Failed to start mixed capture.');
+    overlayListening = false;
+    updateOverlayIndicator();
+  });
+}
+
+function startRecognition() {
+  if (recognition) {
+    try {
+      recognition.stop();
+    } catch (error) {
+      console.warn('Failed to stop previous recognition:', error);
+    }
+  }
+
+  recognition = new webkitSpeechRecognition();
+  recognition.continuous = true;
+  recognition.interimResults = true;
+
+  recognition.onresult = function(event) {
+    let finalTranscript = '';
+    for (let i = event.resultIndex; i < event.results.length; ++i) {
+      if (event.results[i].isFinal) {
+        finalTranscript += event.results[i][0].transcript;
+      }
+    }
+
+    if (finalTranscript.trim() !== '') {
+      chrome.runtime.sendMessage({action: 'updateTranscript', transcript: finalTranscript});
+      updateOverlay('transcript', finalTranscript);
+      chrome.runtime.sendMessage({action: 'getAIResponse', question: finalTranscript});
+    }
+  };
+
+  recognition.onerror = function(event) {
+    console.error('Speech recognition error:', event.error);
+    if (event.error === 'no-speech' && isCapturing) {
+      try {
+        recognition.start();
+      } catch (error) {
+        console.warn('Failed to restart recognition after no-speech:', error);
+      }
+      return;
+    }
+    chrome.runtime.sendMessage({action: 'updateAIResponse', response: `Speech recognition error: ${event.error}. Please try again.`});
+    updateOverlay('response', `Speech recognition error: ${event.error}. Please try again.`);
+  };
+
+  recognition.onend = function() {
+    if (!isCapturing) return;
+    try {
+      recognition.start();
+    } catch (error) {
+      console.warn('Failed to restart recognition:', error);
+    }
+  };
+
+  recognition.start();
+}
+
+function ensureSpeechRecognitionAvailable() {
+  const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+  if (!SpeechRecognition) {
+    const message = 'Speech recognition is not available in this browser context. Use mic mode in Chrome or enable speech recognition.';
+    chrome.runtime.sendMessage({ action: 'updateAIResponse', response: message });
+    updateOverlay('response', message);
+    overlayListening = false;
+    updateOverlayIndicator();
+    return false;
+  }
+  return true;
+}
+
 function stopCapture() {
+  isCapturing = false;
+  overlayListening = false;
+  updateOverlayIndicator();
+  stopAudioMeter();
  if (mediaStream) {
    mediaStream.getTracks().forEach(track => track.stop());
  }
  if (audioContext) {
    audioContext.close();
+    audioContext = null;
  }
  if (recognition) {
    recognition.stop();
  }
 }

-function isQuestion(text) {
-  const questionWords = ['what', 'when', 'where', 'who', 'why', 'how'];
-  const lowerText = text.toLowerCase();
-  return questionWords.some(word => lowerText.includes(word)) || text.includes('?');
-}
+function ensureOverlay() {
+  if (overlayInitialized) return;
+  overlayInitialized = true;
+
+  if (document.getElementById('ai-interview-overlay')) {
+    return;
+  }
+
+  const style = document.createElement('style');
+  style.textContent = `
+    #ai-interview-overlay {
+      position: fixed;
+      top: 24px;
+      right: 24px;
+      width: 420px;
+      min-width: 280px;
+      min-height: 240px;
+      background: rgba(20, 20, 20, 0.35);
+      color: #f5f5f5;
+      border: 1px solid rgba(255, 255, 255, 0.15);
+      border-radius: 12px;
+      backdrop-filter: blur(10px);
+      z-index: 2147483647;
+      font-family: "Helvetica Neue", Arial, sans-serif;
+      box-shadow: 0 10px 30px rgba(0, 0, 0, 0.35);
+      user-select: none;
+      resize: both;
+      overflow: auto;
+    }
+    #ai-interview-resize {
+      position: absolute;
+      right: 6px;
+      bottom: 6px;
+      width: 14px;
+      height: 14px;
+      cursor: se-resize;
+      background: radial-gradient(circle at center, rgba(255, 255, 255, 0.8) 0 2px, transparent 2px);
+      opacity: 0.6;
+    }
+    #ai-interview-overlay.minimized #ai-interview-body {
+      display: none;
+    }
+    #ai-interview-header {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      padding: 10px 12px;
+      cursor: move;
+      font-weight: 600;
+      font-size: 13px;
+      letter-spacing: 0.02em;
+      border-bottom: 1px solid rgba(255, 255, 255, 0.1);
+    }
+    #ai-interview-title {
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    #ai-interview-indicator {
+      width: 10px;
+      height: 10px;
+      border-radius: 50%;
+      background: rgba(255, 255, 255, 0.25);
+      box-shadow: 0 0 0 rgba(255, 255, 255, 0.3);
+    }
+    #ai-interview-indicator.active {
+      background: #41f59a;
+      animation: aiPulse 1.2s ease-in-out infinite;
+      box-shadow: 0 0 8px rgba(65, 245, 154, 0.7);
+    }
+    @keyframes aiPulse {
+      0% { transform: scale(0.9); opacity: 0.6; }
+      50% { transform: scale(1.3); opacity: 1; }
+      100% { transform: scale(0.9); opacity: 0.6; }
+    }
+    #ai-interview-controls {
+      display: flex;
+      gap: 6px;
+    }
+    .ai-interview-btn {
+      background: rgba(255, 255, 255, 0.12);
+      border: none;
+      color: #f5f5f5;
+      font-size: 12px;
+      padding: 4px 8px;
+      border-radius: 6px;
+      cursor: pointer;
+    }
+    .ai-interview-btn:hover {
+      background: rgba(255, 255, 255, 0.22);
+    }
+    #ai-interview-body {
+      padding: 12px;
+      font-size: 12px;
+      line-height: 1.4;
+    }
+    #ai-interview-mode {
+      font-size: 11px;
+      opacity: 0.8;
+      margin-bottom: 6px;
+    }
+    #ai-interview-meter {
+      height: 6px;
+      background: rgba(255, 255, 255, 0.12);
+      border-radius: 999px;
+      overflow: hidden;
+      margin-bottom: 10px;
+    }
+    #ai-interview-meter-bar {
+      height: 100%;
+      width: 0%;
+      background: linear-gradient(90deg, #41f59a, #48c5ff);
+      transition: width 80ms linear;
+    }
+    #ai-interview-transcript,
+    #ai-interview-response {
+      background: rgba(0, 0, 0, 0.35);
+      border-radius: 8px;
+      padding: 8px;
+      margin-bottom: 8px;
+      max-height: 200px;
+      overflow: auto;
+      user-select: text;
+    }
+  `;
+  document.head.appendChild(style);
+
+  const overlay = document.createElement('div');
+  overlay.id = 'ai-interview-overlay';
+  overlay.innerHTML = `
+    <div id="ai-interview-header">
+      <div id="ai-interview-title">
+        <span id="ai-interview-indicator"></span>
+        <span>AI Interview Assistant</span>
+      </div>
+      <div id="ai-interview-controls">
+        <button class="ai-interview-btn" id="ai-interview-detach">Detach</button>
+        <button class="ai-interview-btn" id="ai-interview-minimize">Minimize</button>
+        <button class="ai-interview-btn" id="ai-interview-hide">Hide</button>
+      </div>
+    </div>
+    <div id="ai-interview-body">
+      <div id="ai-interview-mode">Mode: ${activeCaptureMode}</div>
+      <div id="ai-interview-meter"><div id="ai-interview-meter-bar"></div></div>
+      <div id="ai-interview-transcript">Transcript will appear here.</div>
+      <div id="ai-interview-response">Answer will appear here.</div>
+    </div>
+    <div id="ai-interview-resize" title="Resize"></div>
+  `;
+  document.body.appendChild(overlay);
+
+  const header = overlay.querySelector('#ai-interview-header');
+  const minimizeBtn = overlay.querySelector('#ai-interview-minimize');
+  const detachBtn = overlay.querySelector('#ai-interview-detach');
+  const hideBtn = overlay.querySelector('#ai-interview-hide');
+  const resizeHandle = overlay.querySelector('#ai-interview-resize');
+
+  let isDragging = false;
+  let startX = 0;
+  let startY = 0;
+  let startLeft = 0;
+  let startTop = 0;
+
+  header.addEventListener('mousedown', (event) => {
+    isDragging = true;
+    startX = event.clientX;
+    startY = event.clientY;
+    const rect = overlay.getBoundingClientRect();
+    startLeft = rect.left;
+    startTop = rect.top;
+    overlay.style.right = 'auto';
+  });
+
+  document.addEventListener('mousemove', (event) => {
+    if (!isDragging) return;
+    const nextLeft = startLeft + (event.clientX - startX);
+    const nextTop = startTop + (event.clientY - startY);
+    overlay.style.left = `${Math.max(8, nextLeft)}px`;
+    overlay.style.top = `${Math.max(8, nextTop)}px`;
+  });
+
+  document.addEventListener('mouseup', () => {
+    isDragging = false;
+  });
+
+  resizeHandle.addEventListener('mousedown', (event) => {
+    event.preventDefault();
+    event.stopPropagation();
+    const startWidth = overlay.offsetWidth;
+    const startHeight = overlay.offsetHeight;
+    const startMouseX = event.clientX;
+    const startMouseY = event.clientY;
+
+    const onMove = (moveEvent) => {
+      const nextWidth = Math.max(280, startWidth + (moveEvent.clientX - startMouseX));
+      const nextHeight = Math.max(240, startHeight + (moveEvent.clientY - startMouseY));
+      overlay.style.width = `${nextWidth}px`;
+      overlay.style.height = `${nextHeight}px`;
+    };
+
+    const onUp = () => {
+      document.removeEventListener('mousemove', onMove);
+      document.removeEventListener('mouseup', onUp);
+    };
+
+    document.addEventListener('mousemove', onMove);
+    document.addEventListener('mouseup', onUp);
+  });
+
+  minimizeBtn.addEventListener('click', () => {
+    overlay.classList.toggle('minimized');
+    minimizeBtn.textContent = overlay.classList.contains('minimized') ? 'Expand' : 'Minimize';
+  });
+
+  detachBtn.addEventListener('click', () => {
+    chrome.runtime.sendMessage({ action: 'openAssistantWindow' });
+  });
+
+  hideBtn.addEventListener('click', () => {
+    setOverlayHidden(true);
+  });
+
+  updateOverlayIndicator();
+}
+
+function updateOverlay(type, text) {
+  ensureOverlay();
+  applyOverlayHiddenState();
+  const modeEl = document.getElementById('ai-interview-mode');
+  if (modeEl) {
+    modeEl.textContent = `Mode: ${activeCaptureMode}`;
+  }
+  if (type === 'transcript') {
+    const transcriptEl = document.getElementById('ai-interview-transcript');
+    if (transcriptEl) transcriptEl.textContent = text;
+  }
+  if (type === 'response') {
+    const responseEl = document.getElementById('ai-interview-response');
+    if (responseEl) responseEl.textContent = text;
+  }
+}
+
+function updateOverlayIndicator() {
+  const indicator = document.getElementById('ai-interview-indicator');
+  if (!indicator) return;
+  if (overlayListening) {
+    indicator.classList.add('active');
+  } else {
+    indicator.classList.remove('active');
+  }
+
+  if (!overlayListening) {
+    const bar = document.getElementById('ai-interview-meter-bar');
+    if (bar) bar.style.width = '0%';
+  }
+}
+
+function setOverlayHidden(hidden) {
+  overlayHidden = hidden;
+  applyOverlayHiddenState();
+}
+
+function applyOverlayHiddenState() {
+  const overlay = document.getElementById('ai-interview-overlay');
+  if (!overlay) return;
+  overlay.style.display = overlayHidden ? 'none' : '';
+}
+
+function createAudioMeter(stream) {
+  if (!audioContext) {
+    audioContext = new AudioContext();
+  }
+  stopAudioMeter();
+
+  analyserNode = audioContext.createAnalyser();
+  analyserNode.fftSize = 512;
+  analyserNode.smoothingTimeConstant = 0.8;
+
+  meterSource = audioContext.createMediaStreamSource(stream);
+  meterSource.connect(analyserNode);
+
+  const data = new Uint8Array(analyserNode.fftSize);
+
+  const tick = () => {
+    if (!analyserNode) return;
+    analyserNode.getByteTimeDomainData(data);
+    let sum = 0;
+    for (let i = 0; i < data.length; i++) {
+      const v = (data[i] - 128) / 128;
+      sum += v * v;
+    }
+    const rms = Math.sqrt(sum / data.length);
+    const normalized = Math.min(1, rms * 2.5);
+    const bar = document.getElementById('ai-interview-meter-bar');
+    if (bar) {
+      bar.style.width = `${Math.round(normalized * 100)}%`;
+    }
+    meterRaf = requestAnimationFrame(tick);
+  };
+
+  meterRaf = requestAnimationFrame(tick);
+}
+
+function stopAudioMeter() {
+  if (meterRaf) {
+    cancelAnimationFrame(meterRaf);
+    meterRaf = null;
+  }
+  if (meterSource) {
+    try {
+      meterSource.disconnect();
+    } catch (error) {
+      console.warn('Failed to disconnect meter source:', error);
+    }
+    meterSource = null;
+  }
+  if (analyserNode) {
+    try {
+      analyserNode.disconnect();
+    } catch (error) {
+      console.warn('Failed to disconnect analyser:', error);
+    }
+    analyserNode = null;
+  }
+}