feat: Enhance audio capture and monitoring features

- Added "audioCapture" permission to manifest for microphone access.
- Introduced DeepSeek as a new AI provider option in the side panel.
- Implemented a capture mode selection (tab-only, mic-only, mixed) in the side panel.
- Added options to enable/disable the extension and auto-open the assistant window.
- Integrated a mic monitor feature with live input level visualization.
- Included buttons for requesting microphone permission and granting tab access.
- Updated styles for new sections and mic level visualization.
- Enhanced model fetching logic to support DeepSeek and improved error handling.
This commit is contained in:
2026-01-31 21:55:09 +01:00
parent 246506b177
commit 56d56395ee
11 changed files with 1651 additions and 276 deletions

View File

@@ -7,6 +7,19 @@ document.addEventListener('DOMContentLoaded', function() {
const aiProviderSelect = document.getElementById('aiProvider');
const modelSelect = document.getElementById('modelSelect');
const apiKeyStatus = document.getElementById('apiKeyStatus');
const requestMicPermissionBtn = document.getElementById('requestMicPermission');
const showOverlayBtn = document.getElementById('showOverlay');
const micPermissionStatus = document.getElementById('micPermissionStatus');
const grantTabAccessBtn = document.getElementById('grantTabAccess');
const tabAccessStatus = document.getElementById('tabAccessStatus');
const speedModeToggle = document.getElementById('speedModeToggle');
const captureModeSelect = document.getElementById('captureModeSelect');
const autoOpenAssistantWindowToggle = document.getElementById('autoOpenAssistantWindow');
const extensionActiveToggle = document.getElementById('extensionActiveToggle');
const inputDeviceSelect = document.getElementById('inputDeviceSelect');
const inputDeviceStatus = document.getElementById('inputDeviceStatus');
const micLevelBar = document.getElementById('micLevelBar');
const startMicMonitorBtn = document.getElementById('startMicMonitor');
// Context management elements
const contextFileInput = document.getElementById('contextFileInput');
@@ -28,6 +41,11 @@ document.addEventListener('DOMContentLoaded', function() {
let isListening = false;
let remoteServerActive = false;
let micMonitorStream = null;
let micMonitorCtx = null;
let micMonitorSource = null;
let micMonitorAnalyser = null;
let micMonitorRaf = null;
// AI Provider configurations
const aiProviders = {
@@ -52,6 +70,13 @@ document.addEventListener('DOMContentLoaded', function() {
apiKeyPlaceholder: 'Enter your Google AI API Key',
requiresKey: true
},
deepseek: {
name: 'DeepSeek',
models: ['deepseek-chat', 'deepseek-reasoner'],
defaultModel: 'deepseek-chat',
apiKeyPlaceholder: 'Enter your DeepSeek API Key',
requiresKey: true
},
ollama: {
name: 'Ollama',
models: ['llama3.2', 'llama3.1', 'mistral', 'codellama', 'phi3'],
@@ -60,15 +85,26 @@ document.addEventListener('DOMContentLoaded', function() {
requiresKey: false
}
};
const modelCache = {};
const modelFetchState = {};
// Load saved settings
chrome.storage.sync.get(['aiProvider', 'selectedModel', 'apiKeys'], (result) => {
chrome.storage.sync.get(['aiProvider', 'selectedModel', 'apiKeys', 'speedMode', 'captureMode', 'autoOpenAssistantWindow', 'inputDeviceId', 'extensionActive'], (result) => {
const savedProvider = result.aiProvider || 'openai';
const savedModel = result.selectedModel || aiProviders[savedProvider].defaultModel;
const savedApiKeys = result.apiKeys || {};
const speedMode = Boolean(result.speedMode);
const captureMode = result.captureMode || 'tab';
const autoOpenAssistantWindow = Boolean(result.autoOpenAssistantWindow);
const savedInputDeviceId = result.inputDeviceId || '';
const extensionActive = result.extensionActive !== false;
aiProviderSelect.value = savedProvider;
updateModelOptions(savedProvider, savedModel);
if (captureModeSelect) captureModeSelect.value = captureMode;
if (speedModeToggle) speedModeToggle.checked = speedMode;
if (autoOpenAssistantWindowToggle) autoOpenAssistantWindowToggle.checked = autoOpenAssistantWindow;
if (extensionActiveToggle) extensionActiveToggle.checked = extensionActive;
refreshModelOptions(savedProvider, savedModel, savedApiKeys[savedProvider]);
updateApiKeyInput(savedProvider);
if (savedApiKeys[savedProvider] && aiProviders[savedProvider].requiresKey) {
@@ -77,14 +113,18 @@ document.addEventListener('DOMContentLoaded', function() {
saveApiKeyButton.textContent = 'API Key Saved';
saveApiKeyButton.disabled = true;
}
if (inputDeviceSelect) {
loadInputDevices(savedInputDeviceId);
}
});
// Load and display saved contexts
loadContexts();
// Helper functions
function updateModelOptions(provider, selectedModel = null) {
const models = aiProviders[provider].models;
function updateModelOptions(provider, selectedModel = null, modelsOverride = null) {
const models = modelsOverride || modelCache[provider] || aiProviders[provider].models;
modelSelect.innerHTML = '';
models.forEach(model => {
@@ -117,6 +157,286 @@ document.addEventListener('DOMContentLoaded', function() {
apiKeyStatus.className = `status-message ${type}`;
}
function updateMicPermissionStatus(message, type) {
if (!micPermissionStatus) return;
micPermissionStatus.textContent = message;
micPermissionStatus.className = `status-message ${type}`;
}
function updateInputDeviceStatus(message, type) {
if (!inputDeviceStatus) return;
inputDeviceStatus.textContent = message;
inputDeviceStatus.className = `status-message ${type}`;
}
function updateTabAccessStatus(message, type) {
if (!tabAccessStatus) return;
tabAccessStatus.textContent = message;
tabAccessStatus.className = `status-message ${type}`;
}
function pickModel(provider, preferredModel, models) {
if (preferredModel && models.includes(preferredModel)) {
return preferredModel;
}
if (aiProviders[provider].defaultModel && models.includes(aiProviders[provider].defaultModel)) {
return aiProviders[provider].defaultModel;
}
return models[0];
}
async function refreshModelOptions(provider, preferredModel, apiKey) {
if (modelFetchState[provider]) {
return;
}
modelSelect.disabled = true;
modelSelect.innerHTML = '<option>Loading models...</option>';
modelFetchState[provider] = true;
try {
let models = null;
if (provider === 'ollama') {
models = await fetchOllamaModels();
} else if (aiProviders[provider].requiresKey && apiKey) {
models = await fetchRemoteModels(provider, apiKey);
}
if (models && models.length) {
modelCache[provider] = models;
}
} catch (error) {
console.warn(`Failed to fetch models for ${provider}:`, error);
} finally {
modelFetchState[provider] = false;
const availableModels = modelCache[provider] || aiProviders[provider].models;
const selected = pickModel(provider, preferredModel, availableModels);
updateModelOptions(provider, selected, availableModels);
chrome.storage.sync.set({ selectedModel: selected });
modelSelect.disabled = false;
}
}
async function loadInputDevices(preferredDeviceId = '') {
if (!navigator.mediaDevices || !navigator.mediaDevices.enumerateDevices) {
updateInputDeviceStatus('Device enumeration is not supported in this browser.', 'error');
return;
}
try {
const devices = await navigator.mediaDevices.enumerateDevices();
const inputs = devices.filter(device => device.kind === 'audioinput');
const hasLabels = inputs.some(device => device.label);
inputDeviceSelect.innerHTML = '';
if (!inputs.length) {
const option = document.createElement('option');
option.value = '';
option.textContent = 'No input devices found';
inputDeviceSelect.appendChild(option);
inputDeviceSelect.disabled = true;
updateInputDeviceStatus('No microphone devices detected.', 'error');
return;
}
inputs.forEach((device, index) => {
const option = document.createElement('option');
option.value = device.deviceId;
option.textContent = device.label || `Microphone ${index + 1}`;
if (device.deviceId === preferredDeviceId) {
option.selected = true;
}
inputDeviceSelect.appendChild(option);
});
inputDeviceSelect.disabled = false;
const selectedOption = inputDeviceSelect.options[inputDeviceSelect.selectedIndex];
if (!hasLabels) {
updateInputDeviceStatus('Grant mic permission to see device names.', '');
} else {
updateInputDeviceStatus(`Selected: ${selectedOption ? selectedOption.textContent : 'Unknown'}`, '');
}
} catch (error) {
console.warn('Failed to enumerate devices:', error);
updateInputDeviceStatus('Failed to list input devices.', 'error');
}
}
function stopMicMonitor() {
if (micMonitorRaf) {
cancelAnimationFrame(micMonitorRaf);
micMonitorRaf = null;
}
if (micMonitorSource) {
try {
micMonitorSource.disconnect();
} catch (error) {
console.warn('Failed to disconnect mic monitor source:', error);
}
micMonitorSource = null;
}
if (micMonitorAnalyser) {
try {
micMonitorAnalyser.disconnect();
} catch (error) {
console.warn('Failed to disconnect mic monitor analyser:', error);
}
micMonitorAnalyser = null;
}
if (micMonitorCtx) {
micMonitorCtx.close();
micMonitorCtx = null;
}
if (micMonitorStream) {
micMonitorStream.getTracks().forEach(track => track.stop());
micMonitorStream = null;
}
if (micLevelBar) {
micLevelBar.style.width = '0%';
}
}
async function startMicMonitor() {
if (!micLevelBar || !inputDeviceSelect) return;
stopMicMonitor();
updateInputDeviceStatus('Requesting microphone access...', '');
const deviceId = inputDeviceSelect.value;
const constraints = deviceId ? { audio: { deviceId: { exact: deviceId } } } : { audio: true };
try {
micMonitorStream = await navigator.mediaDevices.getUserMedia(constraints);
micMonitorCtx = new AudioContext();
micMonitorAnalyser = micMonitorCtx.createAnalyser();
micMonitorAnalyser.fftSize = 512;
micMonitorAnalyser.smoothingTimeConstant = 0.8;
micMonitorSource = micMonitorCtx.createMediaStreamSource(micMonitorStream);
micMonitorSource.connect(micMonitorAnalyser);
const data = new Uint8Array(micMonitorAnalyser.fftSize);
const tick = () => {
if (!micMonitorAnalyser) return;
micMonitorAnalyser.getByteTimeDomainData(data);
let sum = 0;
for (let i = 0; i < data.length; i++) {
const v = (data[i] - 128) / 128;
sum += v * v;
}
const rms = Math.sqrt(sum / data.length);
const normalized = Math.min(1, rms * 2.5);
micLevelBar.style.width = `${Math.round(normalized * 100)}%`;
micMonitorRaf = requestAnimationFrame(tick);
};
micMonitorRaf = requestAnimationFrame(tick);
const selectedOption = inputDeviceSelect.options[inputDeviceSelect.selectedIndex];
updateInputDeviceStatus(`Mic monitor active: ${selectedOption ? selectedOption.textContent : 'Unknown'}`, 'success');
} catch (error) {
console.warn('Failed to start mic monitor:', error);
if (error && error.name === 'NotAllowedError') {
updateInputDeviceStatus('Microphone permission denied. Click "Request Microphone Permission".', 'error');
} else if (error && error.name === 'NotFoundError') {
updateInputDeviceStatus('No microphone found for the selected device.', 'error');
} else {
updateInputDeviceStatus('Microphone permission denied or unavailable.', 'error');
}
}
}
async function fetchRemoteModels(provider, apiKey) {
if (provider === 'openai') {
return fetchOpenAIModels(apiKey);
}
if (provider === 'anthropic') {
return fetchAnthropicModels(apiKey);
}
if (provider === 'google') {
return fetchGoogleModels(apiKey);
}
if (provider === 'deepseek') {
return fetchDeepSeekModels(apiKey);
}
return [];
}
async function fetchOpenAIModels(apiKey) {
const response = await fetch('https://api.openai.com/v1/models', {
headers: {
'Authorization': `Bearer ${apiKey}`
}
});
if (!response.ok) {
throw new Error(`OpenAI models request failed: ${response.status}`);
}
const data = await response.json();
const ids = (data.data || []).map((item) => item.id).filter(Boolean);
const chatModels = ids.filter((id) => (
id.startsWith('gpt-') ||
id.startsWith('o1') ||
id.startsWith('o3') ||
id.startsWith('o4') ||
id.startsWith('o5')
));
const models = chatModels.length ? chatModels : ids;
return Array.from(new Set(models)).sort();
}
async function fetchAnthropicModels(apiKey) {
const response = await fetch('https://api.anthropic.com/v1/models', {
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'anthropic-version': '2023-06-01'
}
});
if (!response.ok) {
throw new Error(`Anthropic models request failed: ${response.status}`);
}
const data = await response.json();
const items = data.data || data.models || [];
const ids = items.map((item) => item.id || item.name).filter(Boolean);
return Array.from(new Set(ids)).sort();
}
async function fetchGoogleModels(apiKey) {
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`);
if (!response.ok) {
throw new Error(`Google models request failed: ${response.status}`);
}
const data = await response.json();
const models = (data.models || [])
.filter((model) => (model.supportedGenerationMethods || []).includes('generateContent'))
.map((model) => model.name || '')
.map((name) => name.replace(/^models\//, ''))
.filter(Boolean);
return Array.from(new Set(models)).sort();
}
async function fetchDeepSeekModels(apiKey) {
const response = await fetch('https://api.deepseek.com/v1/models', {
headers: {
'Authorization': `Bearer ${apiKey}`
}
});
if (!response.ok) {
throw new Error(`DeepSeek models request failed: ${response.status}`);
}
const data = await response.json();
const ids = (data.data || []).map((item) => item.id).filter(Boolean);
return Array.from(new Set(ids)).sort();
}
async function fetchOllamaModels() {
const response = await fetch('http://localhost:11434/api/tags');
if (!response.ok) {
throw new Error(`Ollama models request failed: ${response.status}`);
}
const data = await response.json();
const models = (data.models || []).map((model) => model.name).filter(Boolean);
return Array.from(new Set(models)).sort();
}
// Context Management Functions
async function loadContexts() {
const result = await chrome.storage.local.get('contexts');
@@ -326,15 +646,8 @@ document.addEventListener('DOMContentLoaded', function() {
// Event listeners
aiProviderSelect.addEventListener('change', function() {
const selectedProvider = this.value;
updateModelOptions(selectedProvider);
updateApiKeyInput(selectedProvider);
// Save provider selection
chrome.storage.sync.set({
aiProvider: selectedProvider,
selectedModel: aiProviders[selectedProvider].defaultModel
});
// Load saved API key for this provider
chrome.storage.sync.get('apiKeys', (result) => {
const apiKeys = result.apiKeys || {};
@@ -348,6 +661,13 @@ document.addEventListener('DOMContentLoaded', function() {
saveApiKeyButton.textContent = 'Save API Key';
saveApiKeyButton.disabled = !aiProviders[selectedProvider].requiresKey;
}
refreshModelOptions(selectedProvider, aiProviders[selectedProvider].defaultModel, apiKeys[selectedProvider]);
});
// Save provider selection
chrome.storage.sync.set({
aiProvider: selectedProvider
});
});
@@ -355,6 +675,57 @@ document.addEventListener('DOMContentLoaded', function() {
chrome.storage.sync.set({ selectedModel: this.value });
});
if (captureModeSelect) {
captureModeSelect.addEventListener('change', function() {
chrome.storage.sync.set({ captureMode: this.value });
});
}
if (autoOpenAssistantWindowToggle) {
autoOpenAssistantWindowToggle.addEventListener('change', function() {
chrome.storage.sync.set({ autoOpenAssistantWindow: this.checked });
});
}
if (extensionActiveToggle) {
extensionActiveToggle.addEventListener('change', function() {
const isActive = this.checked;
chrome.runtime.sendMessage({ action: 'setActiveState', isActive }, (response) => {
if (chrome.runtime.lastError) {
return;
}
if (response && response.success) {
extensionActiveToggle.checked = response.isActive;
}
});
});
}
if (inputDeviceSelect) {
inputDeviceSelect.addEventListener('change', function() {
const deviceId = this.value;
chrome.storage.sync.set({ inputDeviceId: deviceId });
const selectedOption = inputDeviceSelect.options[inputDeviceSelect.selectedIndex];
updateInputDeviceStatus(`Selected: ${selectedOption ? selectedOption.textContent : 'Unknown'}`, '');
if (micMonitorStream) {
startMicMonitor();
}
});
}
if (startMicMonitorBtn) {
startMicMonitorBtn.addEventListener('click', function() {
startMicMonitor();
});
updateInputDeviceStatus('Click \"Enable Mic Monitor\" to see live input level.', '');
}
if (speedModeToggle) {
speedModeToggle.addEventListener('change', function() {
chrome.storage.sync.set({ speedMode: this.checked });
});
}
apiKeyInput.addEventListener('input', function() {
if (aiProviders[aiProviderSelect.value].requiresKey) {
saveApiKeyButton.textContent = 'Save API Key';
@@ -381,6 +752,7 @@ document.addEventListener('DOMContentLoaded', function() {
saveApiKeyButton.textContent = 'API Key Saved';
saveApiKeyButton.disabled = true;
updateApiKeyStatus('API Key Saved', 'success');
refreshModelOptions(provider, modelSelect.value, apiKey);
});
});
} else {
@@ -445,23 +817,93 @@ document.addEventListener('DOMContentLoaded', function() {
toggleButton.textContent = isListening ? 'Stop Listening' : 'Start Listening';
if (isListening) {
if (extensionActiveToggle && !extensionActiveToggle.checked) {
isListening = false;
toggleButton.textContent = 'Start Listening';
aiResponseDiv.textContent = 'Extension is inactive. Turn it on to start listening.';
return;
}
// Send current AI configuration with start listening
const currentProvider = aiProviderSelect.value;
const currentModel = modelSelect.value;
const captureMode = captureModeSelect ? captureModeSelect.value : 'tab';
chrome.runtime.sendMessage({
action: 'startListening',
aiProvider: currentProvider,
model: currentModel
model: currentModel,
captureMode: captureMode
});
transcriptDiv.textContent = 'Listening for questions...';
aiResponseDiv.textContent = `Using ${aiProviders[currentProvider].name} (${currentModel}). The answer will appear here.`;
chrome.storage.sync.get(['autoOpenAssistantWindow'], (result) => {
if (result.autoOpenAssistantWindow) {
chrome.runtime.sendMessage({ action: 'openAssistantWindow' });
}
});
} else {
chrome.runtime.sendMessage({action: 'stopListening'});
transcriptDiv.textContent = '';
aiResponseDiv.textContent = '';
}
});
if (showOverlayBtn) {
showOverlayBtn.addEventListener('click', function() {
chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => {
if (chrome.runtime.lastError || !tabs.length) {
return;
}
chrome.tabs.sendMessage(tabs[0].id, { action: 'showOverlay' });
});
});
}
if (requestMicPermissionBtn) {
requestMicPermissionBtn.addEventListener('click', function() {
updateMicPermissionStatus('Requesting microphone permission...', '');
navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
stream.getTracks().forEach(track => track.stop());
updateMicPermissionStatus('Microphone permission granted.', 'success');
if (inputDeviceSelect) {
loadInputDevices(inputDeviceSelect.value);
}
}).catch((error) => {
if (error && error.name === 'NotAllowedError') {
updateMicPermissionStatus('Microphone permission denied. Please allow access for the extension.', 'error');
} else if (error && error.name === 'NotFoundError') {
updateMicPermissionStatus('No microphone found.', 'error');
} else {
updateMicPermissionStatus(error && error.message ? error.message : 'Failed to request microphone permission.', 'error');
}
});
});
}
if (grantTabAccessBtn) {
grantTabAccessBtn.addEventListener('click', function() {
updateTabAccessStatus('Requesting tab access...', '');
chrome.runtime.sendMessage({ action: 'grantTabAccess' }, (response) => {
if (chrome.runtime.lastError) {
updateTabAccessStatus('Failed to request tab access. Click the extension icon on the target tab.', 'error');
return;
}
if (response && response.success) {
updateTabAccessStatus('Tab access granted. You can start listening now.', 'success');
} else {
updateTabAccessStatus(response && response.error ? response.error : 'Click the extension icon on the target tab to grant access.', 'error');
}
});
});
}
if (navigator.mediaDevices && navigator.mediaDevices.addEventListener) {
navigator.mediaDevices.addEventListener('devicechange', () => {
if (inputDeviceSelect) {
loadInputDevices(inputDeviceSelect.value);
}
});
}
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.action === 'updateTranscript') {
@@ -470,4 +912,4 @@ document.addEventListener('DOMContentLoaded', function() {
aiResponseDiv.textContent = request.response;
}
});
});
});