feat: Enhance audio capture and monitoring features

- Added "audioCapture" permission to manifest for microphone access.
- Introduced DeepSeek as a new AI provider option in the side panel.
- Implemented a capture mode selection (tab-only, mic-only, mixed) in the side panel.
- Added options to enable/disable the extension and auto-open the assistant window.
- Integrated a mic monitor feature with live input level visualization.
- Included buttons for requesting microphone permission and granting tab access.
- Updated styles for new sections and mic level visualization.
- Enhanced model fetching logic to support DeepSeek and improved error handling.
This commit is contained in:
2026-01-31 21:55:09 +01:00
parent 246506b177
commit 56d56395ee
11 changed files with 1651 additions and 276 deletions

View File

@@ -1,19 +1,89 @@
let audioContext;
let mediaStream;
let recognition;
let isCapturing = false;
let overlayInitialized = false;
let activeCaptureMode = 'tab';
let overlayListening = false;
let overlayHidden = false;
let analyserNode = null;
let meterSource = null;
let meterRaf = null;
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.action === 'startCapture') {
activeCaptureMode = 'tab';
startCapture(request.streamId);
sendResponse({success: true});
} else if (request.action === 'stopCapture') {
stopCapture();
sendResponse({success: true});
sendResponse({ success: true });
return false;
}
return true; // Keep the message channel open for async responses
if (request.action === 'startMicCapture') {
activeCaptureMode = 'mic';
startMicCapture();
sendResponse({ success: true });
return false;
}
if (request.action === 'startMixedCapture') {
activeCaptureMode = 'mixed';
startMixedCapture(request.streamId);
sendResponse({ success: true });
return false;
}
if (request.action === 'stopCapture') {
stopCapture();
sendResponse({ success: true });
return false;
}
if (request.action === 'requestMicPermission') {
requestMicPermission().then(sendResponse);
return true;
}
if (request.action === 'updateTranscript') {
updateOverlay('transcript', request.transcript);
return false;
}
if (request.action === 'updateAIResponse') {
updateOverlay('response', request.response);
return false;
}
if (request.action === 'showOverlay') {
setOverlayHidden(false);
return false;
}
if (request.action === 'hideOverlay') {
setOverlayHidden(true);
return false;
}
return false;
});
async function requestMicPermission() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream.getTracks().forEach(track => track.stop());
return { success: true };
} catch (error) {
let errorMessage = 'Microphone permission denied.';
if (error.name === 'NotAllowedError') {
errorMessage = 'Microphone permission denied.';
} else if (error.name === 'NotFoundError') {
errorMessage = 'No microphone found.';
} else {
errorMessage = error.message || 'Unknown error occurred.';
}
return { success: false, error: errorMessage };
}
}
function startCapture(streamId) {
isCapturing = true;
overlayListening = true;
ensureOverlay();
updateOverlayIndicator();
updateOverlay(
'response',
'Tab audio is captured, but speech recognition uses the microphone. Use mic or mixed mode if you want transcription.'
);
navigator.mediaDevices.getUserMedia({
audio: {
chromeMediaSource: 'tab',
@@ -22,37 +92,10 @@ function startCapture(streamId) {
}).then((stream) => {
mediaStream = stream;
audioContext = new AudioContext();
const source = audioContext.createMediaStreamSource(stream);
// Initialize speech recognition
recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onresult = function(event) {
let finalTranscript = '';
for (let i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
finalTranscript += event.results[i][0].transcript;
}
}
if (finalTranscript.trim() !== '') {
chrome.runtime.sendMessage({action: 'updateTranscript', transcript: finalTranscript});
// Check if the transcript contains a question
if (isQuestion(finalTranscript)) {
chrome.runtime.sendMessage({action: 'getAIResponse', question: finalTranscript});
}
}
};
recognition.onerror = function(event) {
console.error('Speech recognition error:', event.error);
chrome.runtime.sendMessage({action: 'updateAIResponse', response: `Speech recognition error: ${event.error}. Please try again.`});
};
recognition.start();
createAudioMeter(stream);
if (ensureSpeechRecognitionAvailable()) {
startRecognition();
}
}).catch((error) => {
console.error('Error starting capture:', error);
let errorMessage = 'Failed to start audio capture. ';
@@ -64,23 +107,473 @@ function startCapture(streamId) {
errorMessage += error.message || 'Unknown error occurred.';
}
chrome.runtime.sendMessage({action: 'updateAIResponse', response: errorMessage});
updateOverlay('response', errorMessage);
overlayListening = false;
updateOverlayIndicator();
});
}
function startMicCapture() {
isCapturing = true;
overlayListening = true;
ensureOverlay();
updateOverlayIndicator();
navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
mediaStream = stream;
audioContext = new AudioContext();
createAudioMeter(stream);
if (ensureSpeechRecognitionAvailable()) {
startRecognition();
}
}).catch((error) => {
console.error('Error starting mic capture:', error);
let errorMessage = 'Failed to start microphone capture. ';
if (error.name === 'NotAllowedError') {
errorMessage += 'Please allow microphone access and try again.';
} else if (error.name === 'NotFoundError') {
errorMessage += 'No microphone found.';
} else {
errorMessage += error.message || 'Unknown error occurred.';
}
chrome.runtime.sendMessage({action: 'updateAIResponse', response: errorMessage});
updateOverlay('response', errorMessage);
overlayListening = false;
updateOverlayIndicator();
});
}
function startMixedCapture(streamId) {
isCapturing = true;
overlayListening = true;
ensureOverlay();
updateOverlayIndicator();
navigator.mediaDevices.getUserMedia({
audio: {
chromeMediaSource: 'tab',
chromeMediaSourceId: streamId
}
}).then((stream) => {
mediaStream = stream;
audioContext = new AudioContext();
createAudioMeter(stream);
if (ensureSpeechRecognitionAvailable()) {
startRecognition();
}
}).catch((error) => {
console.error('Error starting mixed capture:', error);
chrome.runtime.sendMessage({action: 'updateAIResponse', response: 'Failed to start mixed capture.'});
updateOverlay('response', 'Failed to start mixed capture.');
overlayListening = false;
updateOverlayIndicator();
});
}
function startRecognition() {
if (recognition) {
try {
recognition.stop();
} catch (error) {
console.warn('Failed to stop previous recognition:', error);
}
}
recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onresult = function(event) {
let finalTranscript = '';
for (let i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
finalTranscript += event.results[i][0].transcript;
}
}
if (finalTranscript.trim() !== '') {
chrome.runtime.sendMessage({action: 'updateTranscript', transcript: finalTranscript});
updateOverlay('transcript', finalTranscript);
chrome.runtime.sendMessage({action: 'getAIResponse', question: finalTranscript});
}
};
recognition.onerror = function(event) {
console.error('Speech recognition error:', event.error);
if (event.error === 'no-speech' && isCapturing) {
try {
recognition.start();
} catch (error) {
console.warn('Failed to restart recognition after no-speech:', error);
}
return;
}
chrome.runtime.sendMessage({action: 'updateAIResponse', response: `Speech recognition error: ${event.error}. Please try again.`});
updateOverlay('response', `Speech recognition error: ${event.error}. Please try again.`);
};
recognition.onend = function() {
if (!isCapturing) return;
try {
recognition.start();
} catch (error) {
console.warn('Failed to restart recognition:', error);
}
};
recognition.start();
}
function ensureSpeechRecognitionAvailable() {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
const message = 'Speech recognition is not available in this browser context. Use mic mode in Chrome or enable speech recognition.';
chrome.runtime.sendMessage({ action: 'updateAIResponse', response: message });
updateOverlay('response', message);
overlayListening = false;
updateOverlayIndicator();
return false;
}
return true;
}
function stopCapture() {
isCapturing = false;
overlayListening = false;
updateOverlayIndicator();
stopAudioMeter();
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop());
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
if (recognition) {
recognition.stop();
}
}
function isQuestion(text) {
const questionWords = ['what', 'when', 'where', 'who', 'why', 'how'];
const lowerText = text.toLowerCase();
return questionWords.some(word => lowerText.includes(word)) || text.includes('?');
}
function ensureOverlay() {
if (overlayInitialized) return;
overlayInitialized = true;
if (document.getElementById('ai-interview-overlay')) {
return;
}
const style = document.createElement('style');
style.textContent = `
#ai-interview-overlay {
position: fixed;
top: 24px;
right: 24px;
width: 420px;
min-width: 280px;
min-height: 240px;
background: rgba(20, 20, 20, 0.35);
color: #f5f5f5;
border: 1px solid rgba(255, 255, 255, 0.15);
border-radius: 12px;
backdrop-filter: blur(10px);
z-index: 2147483647;
font-family: "Helvetica Neue", Arial, sans-serif;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.35);
user-select: none;
resize: both;
overflow: auto;
}
#ai-interview-resize {
position: absolute;
right: 6px;
bottom: 6px;
width: 14px;
height: 14px;
cursor: se-resize;
background: radial-gradient(circle at center, rgba(255, 255, 255, 0.8) 0 2px, transparent 2px);
opacity: 0.6;
}
#ai-interview-overlay.minimized #ai-interview-body {
display: none;
}
#ai-interview-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 10px 12px;
cursor: move;
font-weight: 600;
font-size: 13px;
letter-spacing: 0.02em;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
#ai-interview-title {
display: flex;
align-items: center;
gap: 8px;
}
#ai-interview-indicator {
width: 10px;
height: 10px;
border-radius: 50%;
background: rgba(255, 255, 255, 0.25);
box-shadow: 0 0 0 rgba(255, 255, 255, 0.3);
}
#ai-interview-indicator.active {
background: #41f59a;
animation: aiPulse 1.2s ease-in-out infinite;
box-shadow: 0 0 8px rgba(65, 245, 154, 0.7);
}
@keyframes aiPulse {
0% { transform: scale(0.9); opacity: 0.6; }
50% { transform: scale(1.3); opacity: 1; }
100% { transform: scale(0.9); opacity: 0.6; }
}
#ai-interview-controls {
display: flex;
gap: 6px;
}
.ai-interview-btn {
background: rgba(255, 255, 255, 0.12);
border: none;
color: #f5f5f5;
font-size: 12px;
padding: 4px 8px;
border-radius: 6px;
cursor: pointer;
}
.ai-interview-btn:hover {
background: rgba(255, 255, 255, 0.22);
}
#ai-interview-body {
padding: 12px;
font-size: 12px;
line-height: 1.4;
}
#ai-interview-mode {
font-size: 11px;
opacity: 0.8;
margin-bottom: 6px;
}
#ai-interview-meter {
height: 6px;
background: rgba(255, 255, 255, 0.12);
border-radius: 999px;
overflow: hidden;
margin-bottom: 10px;
}
#ai-interview-meter-bar {
height: 100%;
width: 0%;
background: linear-gradient(90deg, #41f59a, #48c5ff);
transition: width 80ms linear;
}
#ai-interview-transcript,
#ai-interview-response {
background: rgba(0, 0, 0, 0.35);
border-radius: 8px;
padding: 8px;
margin-bottom: 8px;
max-height: 200px;
overflow: auto;
user-select: text;
}
`;
document.head.appendChild(style);
const overlay = document.createElement('div');
overlay.id = 'ai-interview-overlay';
overlay.innerHTML = `
<div id="ai-interview-header">
<div id="ai-interview-title">
<span id="ai-interview-indicator"></span>
<span>AI Interview Assistant</span>
</div>
<div id="ai-interview-controls">
<button class="ai-interview-btn" id="ai-interview-detach">Detach</button>
<button class="ai-interview-btn" id="ai-interview-minimize">Minimize</button>
<button class="ai-interview-btn" id="ai-interview-hide">Hide</button>
</div>
</div>
<div id="ai-interview-body">
<div id="ai-interview-mode">Mode: ${activeCaptureMode}</div>
<div id="ai-interview-meter"><div id="ai-interview-meter-bar"></div></div>
<div id="ai-interview-transcript">Transcript will appear here.</div>
<div id="ai-interview-response">Answer will appear here.</div>
</div>
<div id="ai-interview-resize" title="Resize"></div>
`;
document.body.appendChild(overlay);
const header = overlay.querySelector('#ai-interview-header');
const minimizeBtn = overlay.querySelector('#ai-interview-minimize');
const detachBtn = overlay.querySelector('#ai-interview-detach');
const hideBtn = overlay.querySelector('#ai-interview-hide');
const resizeHandle = overlay.querySelector('#ai-interview-resize');
let isDragging = false;
let startX = 0;
let startY = 0;
let startLeft = 0;
let startTop = 0;
header.addEventListener('mousedown', (event) => {
isDragging = true;
startX = event.clientX;
startY = event.clientY;
const rect = overlay.getBoundingClientRect();
startLeft = rect.left;
startTop = rect.top;
overlay.style.right = 'auto';
});
document.addEventListener('mousemove', (event) => {
if (!isDragging) return;
const nextLeft = startLeft + (event.clientX - startX);
const nextTop = startTop + (event.clientY - startY);
overlay.style.left = `${Math.max(8, nextLeft)}px`;
overlay.style.top = `${Math.max(8, nextTop)}px`;
});
document.addEventListener('mouseup', () => {
isDragging = false;
});
resizeHandle.addEventListener('mousedown', (event) => {
event.preventDefault();
event.stopPropagation();
const startWidth = overlay.offsetWidth;
const startHeight = overlay.offsetHeight;
const startMouseX = event.clientX;
const startMouseY = event.clientY;
const onMove = (moveEvent) => {
const nextWidth = Math.max(280, startWidth + (moveEvent.clientX - startMouseX));
const nextHeight = Math.max(240, startHeight + (moveEvent.clientY - startMouseY));
overlay.style.width = `${nextWidth}px`;
overlay.style.height = `${nextHeight}px`;
};
const onUp = () => {
document.removeEventListener('mousemove', onMove);
document.removeEventListener('mouseup', onUp);
};
document.addEventListener('mousemove', onMove);
document.addEventListener('mouseup', onUp);
});
minimizeBtn.addEventListener('click', () => {
overlay.classList.toggle('minimized');
minimizeBtn.textContent = overlay.classList.contains('minimized') ? 'Expand' : 'Minimize';
});
detachBtn.addEventListener('click', () => {
chrome.runtime.sendMessage({ action: 'openAssistantWindow' });
});
hideBtn.addEventListener('click', () => {
setOverlayHidden(true);
});
updateOverlayIndicator();
}
function updateOverlay(type, text) {
ensureOverlay();
applyOverlayHiddenState();
const modeEl = document.getElementById('ai-interview-mode');
if (modeEl) {
modeEl.textContent = `Mode: ${activeCaptureMode}`;
}
if (type === 'transcript') {
const transcriptEl = document.getElementById('ai-interview-transcript');
if (transcriptEl) transcriptEl.textContent = text;
}
if (type === 'response') {
const responseEl = document.getElementById('ai-interview-response');
if (responseEl) responseEl.textContent = text;
}
}
function updateOverlayIndicator() {
const indicator = document.getElementById('ai-interview-indicator');
if (!indicator) return;
if (overlayListening) {
indicator.classList.add('active');
} else {
indicator.classList.remove('active');
}
if (!overlayListening) {
const bar = document.getElementById('ai-interview-meter-bar');
if (bar) bar.style.width = '0%';
}
}
function setOverlayHidden(hidden) {
overlayHidden = hidden;
applyOverlayHiddenState();
}
function applyOverlayHiddenState() {
const overlay = document.getElementById('ai-interview-overlay');
if (!overlay) return;
overlay.style.display = overlayHidden ? 'none' : '';
}
function createAudioMeter(stream) {
if (!audioContext) {
audioContext = new AudioContext();
}
stopAudioMeter();
analyserNode = audioContext.createAnalyser();
analyserNode.fftSize = 512;
analyserNode.smoothingTimeConstant = 0.8;
meterSource = audioContext.createMediaStreamSource(stream);
meterSource.connect(analyserNode);
const data = new Uint8Array(analyserNode.fftSize);
const tick = () => {
if (!analyserNode) return;
analyserNode.getByteTimeDomainData(data);
let sum = 0;
for (let i = 0; i < data.length; i++) {
const v = (data[i] - 128) / 128;
sum += v * v;
}
const rms = Math.sqrt(sum / data.length);
const normalized = Math.min(1, rms * 2.5);
const bar = document.getElementById('ai-interview-meter-bar');
if (bar) {
bar.style.width = `${Math.round(normalized * 100)}%`;
}
meterRaf = requestAnimationFrame(tick);
};
meterRaf = requestAnimationFrame(tick);
}
function stopAudioMeter() {
if (meterRaf) {
cancelAnimationFrame(meterRaf);
meterRaf = null;
}
if (meterSource) {
try {
meterSource.disconnect();
} catch (error) {
console.warn('Failed to disconnect meter source:', error);
}
meterSource = null;
}
if (analyserNode) {
try {
analyserNode.disconnect();
} catch (error) {
console.warn('Failed to disconnect analyser:', error);
}
analyserNode = null;
}
}