voicechatai/index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>VoiceChat AI Assistant</title>
    <style>
        :root {
            --primary-color: #4361ee;
            --secondary-color: #3f37c9;
            --success-color: #4cc9f0;
            --light-color: #f8f9fa;
            --dark-color: #212529;
            --border-radius: 8px;
            --box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        }

        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            color: var(--dark-color);
            background-color: #f0f2f5;
            padding: 0;
            margin: 0;
        }

        .container {
            max-width: 900px;
            margin: 0 auto;
            padding: 20px;
        }

        header {
            background-color: var(--primary-color);
            color: white;
            padding: 20px 0;
            text-align: center;
            border-radius: var(--border-radius);
            margin-bottom: 30px;
            box-shadow: var(--box-shadow);
        }

        h1 {
            margin: 0;
            font-size: 2.5rem;
        }

        .tagline {
            font-size: 1.2rem;
            opacity: 0.9;
            margin-top: 10px;
        }

        .subtext {
            font-size: 1.0rem;
            opacity: 0.9;
            margin-top: 0px;
        }

        .app-description {
            background-color: white;
            border-radius: var(--border-radius);
            padding: 25px;
            margin-bottom: 30px;
            box-shadow: var(--box-shadow);
        }

        .app-description h2 {
            color: var(--primary-color);
            margin-top: 0;
        }

        .features {
            display: flex;
            justify-content: space-between;
            gap: 20px;
            margin-bottom: 30px;
        }

        .feature {
            background-color: white;
            border-radius: var(--border-radius);
            padding: 20px;
            flex: 1;
            box-shadow: var(--box-shadow);
            text-align: center;
        }

        .feature h3 {
            color: var(--secondary-color);
            margin-top: 0;
        }

        .feature-icon {
            font-size: 2.5rem;
            margin-bottom: 15px;
            color: var(--primary-color);
        }

        .controls {
            background-color: white;
            border-radius: var(--border-radius);
            padding: 25px;
            margin-bottom: 30px;
            box-shadow: var(--box-shadow);
            text-align: center;
        }

        .conversation {
            display: flex;
            flex-direction: column;
            gap: 20px;
        }

        .speech-container {
            background-color: white;
            border-radius: var(--border-radius);
            padding: 25px;
            box-shadow: var(--box-shadow);
        }

        .speech-container h3 {
            margin-top: 0;
            color: var(--primary-color);
            display: flex;
            align-items: center;
            gap: 10px;
        }

        .speech-icon {
            font-size: 1.2rem;
        }

        .result-box {
            border: 1px solid #e9ecef;
            padding: 15px;
            border-radius: var(--border-radius);
            min-height: 50px;
            background-color: #f8f9fa;
        }

        .pulse {
            display: inline-block;
            width: 20px;
            height: 20px;
            border-radius: 50%;
            background: var(--success-color);
            margin-right: 10px;
            animation: pulse 1.5s infinite;
        }

        @keyframes pulse {
            0% {
                transform: scale(0.95);
                box-shadow: 0 0 0 0 rgba(76, 201, 240, 0.7);
            }

            70% {
                transform: scale(1);
                box-shadow: 0 0 0 10px rgba(76, 201, 240, 0);
            }

            100% {
                transform: scale(0.95);
                box-shadow: 0 0 0 0 rgba(76, 201, 240, 0);
            }
        }

        button {
            background-color: var(--primary-color);
            border: none;
            color: white;
            padding: 12px 25px;
            text-align: center;
            text-decoration: none;
            display: inline-block;
            font-size: 16px;
            margin: 10px 0;
            cursor: pointer;
            border-radius: 50px;
            transition: all 0.3s ease;
        }

        button:hover {
            background-color: var(--secondary-color);
            transform: translateY(-2px);
            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
        }

        button:active {
            transform: translateY(0);
        }

        .status-indicator {
            display: flex;
            align-items: center;
            justify-content: center;
            margin: 10px 0;
        }

        .status-light {
            width: 12px;
            height: 12px;
            border-radius: 50%;
            margin-right: 10px;
            background-color: #6c757d;
        }

        .status-light.listening {
            background-color: #dc3545;
            animation: blink 1s infinite;
        }

        .status-light.processing {
            background-color: #ffc107;
        }

        .status-light.ready {
            background-color: #28a745;
        }

        .status-light.speaking {
            background-color: #17a2b8;
            animation: blink 1s infinite;
        }

        @keyframes blink {
            0% { opacity: 1; }
            50% { opacity: 0.4; }
            100% { opacity: 1; }
        }

        footer {
            text-align: center;
            margin-top: 30px;
            padding: 20px 0;
            color: #6c757d;
            font-size: 0.9rem;
        }
    </style>
</head>
<body>
    <div class="container">
        <header>
            <h1>VoiceChat AI Assistant</h1>
	    <p class="subtext"> <i>Demo</i><p>
            <p class="tagline">Your hands-free AI companion - just speak and let AI respond</p>
        </header>

        <div class="app-description">
            <h2>How It Works</h2>
            <p>VoiceChat AI enables completely hands-free interaction with advanced AI models. Simply speak your question or command, pause when you're done, and listen to the AI's spoken response. Perfect for cooking, driving, working out, or whenever your hands are busy.</p>
        </div>

        <div class="features">
            <div class="feature">
                <div class="feature-icon">🎤</div>
                <h3>Voice Recognition</h3>
                <p>Automatically detects when you've finished speaking and processes your request</p>
            </div>
            <div class="feature">
                <div class="feature-icon">🧠</div>
                <h3>AI Processing</h3>
                <p>Connects to powerful language models to generate intelligent responses</p>
            </div>
            <div class="feature">
                <div class="feature-icon">🔊</div>
                <h3>Voice Response</h3>
                <p>Speaks the AI's answer back to you with natural-sounding speech</p>
            </div>
        </div>

        <div class="controls">
            <h2>Start a Conversation</h2>
            <p>Click the button below and start speaking after you see "Listening..."</p>
            <button id="startButton">Start Listening</button>

            <div class="status-indicator">
                <div id="statusLight" class="status-light ready"></div>
                <span>Status: <span id="status">Ready</span></span>
            </div>
        </div>

        <div class="conversation">
            <div class="speech-container">
                <h3><span class="speech-icon">🗣️</span> Your Voice Input</h3>
                <p>This shows what the system hears you saying in real-time:</p>
                <div id="output" class="result-box"></div>
            </div>

            <div class="speech-container">
                <h3><span class="speech-icon">🤖</span> AI Response</h3>
                <p>The AI's answer will appear here and be spoken aloud:</p>
                <div id="finalResult" class="result-box"></div>
            </div>
        </div>

        <footer>
            <p>VoiceChat AI Assistant uses your browser's Speech Recognition API and connects to language models via WebSockets</p>
            <p>Your audio stays in your browser - only the text is sent for processing</p>
        </footer>
    </div>

    <script>
        // Configuration
        const CONFIG = {
            silenceTimeout: 3000, // Time in ms to wait after silence (3 seconds)
            wsEndpoint: 'ws://localhost:9898',
            language: 'en-US'
        };

        // DOM Elements
        const DOM = {
            startButton: document.getElementById('startButton'),
            status: document.getElementById('status'),
            statusLight: document.getElementById('statusLight'),
            output: document.getElementById('output'),
            finalResult: document.getElementById('finalResult')
        };

        // Application State
        const STATE = {
            silenceTimer: null,
            finalSpeechResult: '',
            speaking: false
        };

        /**
         * Set the UI status with appropriate styling
         * @param {string} status - The status text to display
         * @param {string} statusClass - CSS class for the status light
         */
        function setStatus(status, statusClass) {
            DOM.status.textContent = status;
            DOM.statusLight.className = 'status-light ' + statusClass;
        }

        /**
         * Initialize the application
         */
        function initApp() {
            checkBrowserSupport();
            setupWebSocket();
            setupSpeechRecognition();
        }

        /**
         * Check if the browser supports required APIs
         */
        function checkBrowserSupport() {
            if (!('webkitSpeechRecognition' in window)) {
                document.body.innerHTML = '<div class="container"><header><h1>Browser Not Supported</h1></header>' +
                    '<div class="app-description"><h2>Error</h2><p>Your browser does not support the Web Speech API. ' +
                    'Please try using Google Chrome or another compatible browser.</p></div></div>';
                throw new Error('Web Speech API not supported');
            }
        }

        /**
         * Set up WebSocket connection
         * @returns {WebSocket} The WebSocket instance
         */
        function setupWebSocket() {
            if (!("WebSocket" in window)) {
                DOM.finalResult.textContent = 'WebSocket is not supported by your browser';
                console.error('WebSocket is not supported by your browser');
                return null;
            }

            const socket = new WebSocket(CONFIG.wsEndpoint);

            socket.onmessage = handleWebSocketMessage;
            socket.onopen = () => {
                console.log('Connected to the server');
                DOM.finalResult.textContent = 'Connected to AI service - ready to chat!';
            };
            socket.onerror = () => {
                console.error('WebSocket error occurred');
                DOM.finalResult.textContent = 'Error connecting to AI service. Please try again later.';
            };
            socket.onclose = () => {
                console.log('Disconnected from the server');
                DOM.finalResult.textContent = 'Disconnected from AI service.';
            };

            return socket;
        }

        /**
         * Handle incoming WebSocket messages
         * @param {MessageEvent} event - WebSocket message event
         */
        function handleWebSocketMessage(event) {
            console.log('Received message from server =>', event.data);
            DOM.finalResult.textContent = event.data;
            speakText(event.data);
        }

	var recognition = null;
        /**
         * Set up speech recognition
         * @returns {SpeechRecognition} The speech recognition instance
         */
        function setupSpeechRecognition() {
	    // DHS
            recognition = new window.webkitSpeechRecognition();

            // Configure recognition
            recognition.continuous = false;
            recognition.interimResults = true;
            recognition.lang = CONFIG.language;

            // Set up event handlers
            recognition.onresult = handleSpeechResult;
            recognition.onerror = handleSpeechError;
            recognition.onend = handleSpeechEnd;
            recognition.onstart = handleSpeechStart;

            // Set up the start button
            DOM.startButton.addEventListener('click', () => startListening(recognition));

            return recognition;
        }

        /**
         * Handle speech recognition start
         */
        function handleSpeechStart() {
            setStatus('Listening...', 'listening');
        }

        /**
         * Start speech recognition
         * @param {SpeechRecognition} recognition - The speech recognition instance
         */
        function startListening(recognition) {
            try {
                recognition.start();
                console.log('Listening started...');
                DOM.output.textContent = '';
                DOM.finalResult.textContent = '';
                STATE.finalSpeechResult = '';
            } catch (error) {
                console.error('Speech recognition error:', error);
                setStatus('Error starting recognition', '');
            }
        }

        /**
         * Handle speech recognition results
         * @param {SpeechRecognitionEvent} event - Speech recognition event
         */
        function handleSpeechResult(event) {
            // Get the latest result
            const lastResultIndex = event.results.length - 1;
            const result = event.results[lastResultIndex][0].transcript;
            DOM.output.textContent = result;

            // Store the current result
            STATE.finalSpeechResult = result;

            // Reset the silence timer
            if (STATE.silenceTimer) clearTimeout(STATE.silenceTimer);

            // Start a new silence timer
            STATE.silenceTimer = setTimeout(() => {
                // Stop listening after silence timeout
                this.stop();
                console.log('Stopped listening due to silence');
                setStatus('Processing...', 'processing');

                // Process the final result
                processFinalSpeech(STATE.finalSpeechResult);
            }, CONFIG.silenceTimeout);
        }

        /**
         * Handle speech recognition errors
         * @param {SpeechRecognitionErrorEvent} event - Speech recognition error event
         */
        function handleSpeechError(event) {
            console.error('Speech recognition error:', event.error);
            setStatus(`Error: ${event.error}`, '');
        }

        /**
         * Handle speech recognition end
         */
        function handleSpeechEnd() {
            // Clear any remaining silence timer
            if (STATE.silenceTimer) clearTimeout(STATE.silenceTimer);
            console.log('Recognition ended');

            // If status is still "Listening..." it means recognition ended for some other reason
            if (DOM.status.textContent === 'Listening...') {
                setStatus('Processing...', 'processing');
            }

            // Make sure we process the final result
            if (STATE.finalSpeechResult && DOM.finalResult.textContent === '') {
                processFinalSpeech(STATE.finalSpeechResult);
            }
        }

        /**
         * Process the final speech after recognition stops
         * @param {string} text - The recognized speech text
         */
        function processFinalSpeech(text) {
            console.log('Processing final speech:', text);
            if (text && text.trim() !== '') {
                window.socket.send(text);
            } else {
                setStatus('Ready', 'ready');
                DOM.finalResult.textContent = "I didn't catch that. Please try speaking again.";
            }
        }

        /**
         * Speak text using speech synthesis
         * @param {string} text - Text to speak
         */
        function speakText(text) {
            const speechSynthesis = window.speechSynthesis;

            if (!speechSynthesis) {
                console.error('Speech synthesis is not supported');
                DOM.finalResult.textContent = 'Speech synthesis is not supported in this browser';
                return;
            }

            if (!text) {
                setStatus('No text to speak', 'ready');
                return;
            }

            // Cancel any ongoing speech
            if (STATE.speaking) {
                speechSynthesis.cancel();
            }

            const utterance = new SpeechSynthesisUtterance(text);

            utterance.onstart = () => {
                STATE.speaking = true;
                setStatus('Speaking...', 'speaking');
            };

            utterance.onend = () => {
                STATE.speaking = false;
                setStatus('Ready', 'ready');
	        startListening(recognition);
            };

            utterance.onerror = event => {
                STATE.speaking = false;
                setStatus('Error speaking', '');
                console.error('Speech synthesis error:', event);
            };

            speechSynthesis.speak(utterance);

        }

        // Store WebSocket globally for access from other functions
        window.socket = setupWebSocket();

        // Initialize the application
        initApp();
    </script>
</body>
</html>