From d867d479351d2c900b1d78912d79edddb6db7281 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 26 May 2025 11:14:15 +0000 Subject: [PATCH] Initial commit of my voice chat AI project --- .dockerignore | 16 + .env.example | 1 + .gitattributes | 2 + .gitignore | 22 ++ Dockerfile | 28 ++ LICENSE | 9 + README.md | 16 + blog/blog-post.md | 349 +++++++++++++++++++ blog/images/VoiceChatWebapp.jpg | 3 + blog/images/clientserver.png | 3 + blog/images/headphones.png | 3 + config.yaml | 5 + docker-compose.yaml | 42 +++ index.html | 572 ++++++++++++++++++++++++++++++++ index.js | 137 ++++++++ package-lock.json | 229 +++++++++++++ package.json | 28 ++ 17 files changed, 1465 insertions(+) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 blog/blog-post.md create mode 100644 blog/images/VoiceChatWebapp.jpg create mode 100644 blog/images/clientserver.png create mode 100644 blog/images/headphones.png create mode 100644 config.yaml create mode 100644 docker-compose.yaml create mode 100644 index.html create mode 100644 index.js create mode 100644 package-lock.json create mode 100644 package.json diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4fe351f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,16 @@ +node_modules +npm-debug.log +.git +.gitignore +README.md +.env.example +.nyc_output +coverage +.coverage +.cache +Dockerfile +.dockerignore +docker-compose.yml +.DS_Store +*.log +blog diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..3fa462e --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +API_KEY='your_litellm_secret_key' diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1691f22 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.png filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..77d74fb --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +# Node.js ignore file + +# Ignore log files +logs +*.log + +# Ignore runtime data +pids +*.pid +*.seed + +# Ignore OS-specific files +.DS_Store + Thumbs.db + +# Ignore environment files +.env +.env.local +.env.prod + +# Ignore node_modules +node_modules/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7e35d3a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +# Use official Node.js runtime as base image +FROM node:18-alpine + +# Set working directory in container +WORKDIR /app + +# Copy package.json and package-lock.json (if available) +COPY package*.json ./ + +# Install dependencies +RUN npm ci --only=production + +# Copy application code +COPY . . + +# Create non-root user for security +RUN addgroup -g 1001 -S nodejs +RUN adduser -S nextjs -u 1001 + +# Change ownership of the app directory +RUN chown -R nextjs:nodejs /app +USER nextjs + +# Expose port (adjust if your app uses different port) +EXPOSE 9898 + +# Command to run the application +CMD ["node", "index.js"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0d21b9e --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2025 Public + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..17e9a7b --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# voicechat + +VoiceChat AI Assistant - Demo + +Your hands-free AI companion - just speak and let AI respond + +# Docker Commands + +# Build and start the application +docker-compose up --build -d + +# View logs +docker-compose logs -f + +# Stop the application +docker-compose down diff --git a/blog/blog-post.md b/blog/blog-post.md new file mode 100644 index 0000000..e5bfbe7 --- /dev/null +++ b/blog/blog-post.md @@ -0,0 +1,349 @@ +# Building a Hands-Free AI Assistant: Speech Recognition Meets LLMs + +*Posted on May 20, 2025 by David H Sells* + +![A robot with headphones and a microphone](images/headphones.png) + +## TL;DR +I built a hands-free AI assistant that lets you talk to an LLM without touching your keyboard. Speak, wait for silence, and let the AI respond with its synthesized voice. All using JavaScript, WebSockets, and the Web Speech API. Code included! + +## The "Why?" + +Ever had that moment when you're elbow-deep in cookie dough and suddenly need to convert tablespoons to milliliters? Or maybe you're changing a tire and need to remember the proper torque settings? + +I found myself constantly wanting to talk to AI assistants **without having to touch anything**. Sure, there are commercial solutions like Alexa and Google Assistant, but I wanted something: + +1. That I could customize completely +2. That would use my choice of language model +3. That wouldn't constantly listen and send audio to the cloud +4. That I could host on my own hardware + +So I built this hands-free LLM interface that uses speech recognition to understand you, sends your question to any LLM, and then speaks the response back to you. + +## The Magic Ingredients + +Our speech-powered AI assistant requires four main components: + +1. **Speech Recognition** - To understand what you're saying +2. **LiteLLM Proxy** - A unified API gateway that interfaces with multiple LLM providers +3. **LLM API Communication** - To get intelligent responses (via GROQ's Llama3-70B) +4. **Speech Synthesis** - To speak those responses back to you + +Let's dive into how we built each part! + +## The Architecture: A Three-Tier Symphony + +Our application consists of three main components working in harmony: + +1. **Client (index.html)** - Handles speech recognition and synthesis in the browser +2. **Node.js Server (index.js)** - WebSocket server that manages client connections +3. **LiteLLM Proxy** - API gateway that communicates with GROQ's Llama3-70B model + +![Client-Server Architecture Diagram](images/clientserver.png) + +The LiteLLM proxy acts as a unified interface to various LLM providers, allowing us to easily switch between different models and providers without changing our application code. In our setup, it's configured to use GROQ's powerful Llama3-70B model for fast, high-quality responses. + +## The Client: Teaching Your Browser to Listen and Speak + +Our client code (in `index.html`) does two critical things: +- Listens for your voice input until you stop talking +- Speaks the AI's response back to you + +### Speech Recognition: It's All About the Silence + +The challenge with speech recognition isn't getting the words—it's knowing when you're done talking! Our solution uses a **silence detection** approach that automatically stops listening after you've been quiet for a few seconds. + +```javascript +function handleSpeechResult(event) { + // Get the text you've spoken so far + const result = event.results[event.results.length - 1][0].transcript; + + // Reset our silence timer + if (silenceTimer) clearTimeout(silenceTimer); + + // Start a new silence timer - if you stop talking, this will trigger + silenceTimer = setTimeout(() => { + // You've been quiet long enough, stop listening + this.stop(); + processFinalSpeech(result); + }, CONFIG.silenceTimeout); +} +``` + +This is genius in its simplicity. Every time you say something, we reset the timer. When you stop talking, the timer counts down and then triggers our processing function. + +### Speech Synthesis: Making Your Computer Talk Back + +Once we get the AI's response, we use the browser's built-in speech synthesis to read it aloud: + +```javascript +function speakText(text) { + const utterance = new SpeechSynthesisUtterance(text); + speechSynthesis.speak(utterance); +} +``` + +Browser speech synthesis might not sound like Morgan Freeman, but it's surprisingly good these days. And unlike recorded audio, it can say literally anything our AI responds with! + +## The Server: WebSocket Orchestrator + +The Node.js server (in `index.js`) acts as the communication hub between your voice and the AI's brain. It: +1. Hosts the HTML interface +2. Handles WebSocket connections for real-time communication +3. Forwards your spoken text to the LiteLLM proxy +4. Relays the AI responses back to your browser + +The most interesting part is how we communicate with LiteLLM: + +```javascript +async function queryLLM(ws, message) { + try { + const got = (await import('got')).default; + const response = await got(process.env.LLM_API_ENDPOINT, { + method: 'POST', + headers: { + 'content-type': 'application/json', + 'Authorization': `Bearer ${API_KEY}`, + }, + json: { + model: LLM_MODEL, + messages: [ + { + "role": "system", + "content": "Keep response to 4 lines of text." + }, + { role: 'user', content: message } + ], + max_tokens: 1000 + } + }); + + const data = JSON.parse(response.body); + if (data?.choices?.[0]?.message?.content) { + const content = data.choices[0].message.content; + ws.send(content); + } + } catch (error) { + console.error('Error querying LLM:', error); + ws.send('Sorry, there was an error processing your request.'); + } +} +``` + +This function takes what you said, packages it for the LiteLLM API, and sends the response back to your browser via WebSockets. Notice how we include a system message to keep responses concise - perfect for voice interaction! The beauty of using LiteLLM is that it provides a unified interface to dozens of different LLM providers. + +## The Whole Conversation Flow + +![Voice Chat Webapp](images/VoiceChatWebapp.jpg) + + +Here's what happens when you use this application: + +1. You click "Start Listening" +2. Your browser asks for microphone permission +3. You speak your question or command +4. You stop talking and wait (for about 15 seconds) +5. The browser detects silence and sends your speech text via WebSocket to the Node.js server +6. The Node.js server forwards your text to the LiteLLM proxy +7. LiteLLM routes the request to GROQ's Llama3-70B model +8. The AI model generates a response (limited to 4 lines for voice-friendly delivery) +9. The response travels back through LiteLLM → Node.js server → your browser +10. Your browser speaks the response aloud using speech synthesis + +It's like a digital game of telephone with three stops, except nothing gets lost in translation and it's blazingly fast thanks to GROQ's inference speed! + +## The Code: A Masterpiece of Modular Design + +After some refactoring (because my first version looked like it was written during a caffeine overdose), both files now follow clean code principles: + +### The Server (index.js) + +```javascript +// Configuration loaded from environment variables +const PORT = 9898; +const API_KEY = 'your_secret_key_xx'; // Replace with actual key +const LLM_MODEL = 'llama3-70b'; +const HTML_FILE = 'index.html'; + +// Single-purpose functions with clear names +function createHttpServer() { + return http.createServer(handleHttpRequest); +} + +function handleWebSocketConnection(ws) { + console.log('Client connected'); + ws.on('message', (message) => handleIncomingMessage(ws, message)); + ws.on('close', () => console.log('Client disconnected')); +} + +// Uses environment variable for LiteLLM endpoint +// process.env.LLM_API_ENDPOINT points to LiteLLM proxy +``` + +### The Client (index.html) + +```javascript +// Organized into configuration, DOM elements, and state +const CONFIG = { + silenceTimeout: 15000, + wsEndpoint: 'wss://openui.davidsells.today', + language: 'en-US' +}; + +const DOM = { + startButton: document.getElementById('startButton'), + status: document.getElementById('status'), + output: document.getElementById('output'), + finalResult: document.getElementById('finalResult') +}; + +const STATE = { + silenceTimer: null, + finalSpeechResult: '', + speaking: false +}; + +// Clear initialization flow +function initApp() { + checkBrowserSupport(); + setupWebSocket(); + setupSpeechRecognition(); +} +``` + +## The LiteLLM Magic: One API to Rule Them All + +One of the coolest parts of this setup is LiteLLM, which acts as a universal translator for different LLM APIs. Instead of writing separate code for OpenAI, Anthropic, GROQ, or dozens of other providers, LiteLLM provides a single, consistent interface. + +Our `config.yaml` file tells LiteLLM how to route requests: + +```yaml +model_list: + - model_name: 'llama3-70b' + litellm_params: + model: 'groq/llama3-70b-8192' + api_key: your_groq_api_key_here +``` + +This configuration maps our friendly model name `llama3-70b` to GROQ's specific endpoint. Want to switch to OpenAI's GPT-4? Just change the configuration file - no code changes needed! + +## Running the Application: Docker-Powered Deployment + +The modern way to run this application is with Docker Compose, which orchestrates both our Node.js application and the LiteLLM proxy. Here's how to get started: + +### Prerequisites + +1. Install Docker and Docker Compose +2. Get a GROQ API key from [groq.com](https://groq.com) +3. Clone the repository: `git clone [repository-url]` + +### Quick Start with Docker Compose + +1. **Create your environment file** (`.env`): + ```bash + API_KEY=your_litellm_master_key_here + GROQ_API_KEY=your_groq_api_key_here + ``` + +2. **Update the config.yaml** with your GROQ API key (replace the placeholder) + +3. **Launch everything with one command**: + ```bash + docker-compose up -d + ``` + +That's it! Docker Compose will: +- Build the Node.js application container +- Pull and configure the LiteLLM container +- Set up networking between the containers +- Expose the web interface on port 9898 + +4. **Access your voice assistant**: + - Open your browser to `http://localhost:9898` + - Click "Start Listening" and start talking! + +### Manual Setup (if you prefer the old-school way) + +If you want to run things manually without Docker: + +1. **Set up LiteLLM**: + ```bash + pip install litellm + litellm --config config.yaml --port 4000 + ``` + +2. **Set up the Node.js app**: + ```bash + npm install + export LLM_API_ENDPOINT=http://localhost:4000/v1/chat/completions + node index.js + ``` + +3. **Access the application** at `http://localhost:9898` + +## Customization Ideas + +The beauty of this modular architecture is how easily you can customize it: + +### LLM Provider Changes +- **Switch to OpenAI**: Update `config.yaml` to use `openai/gpt-4` +- **Try Claude**: Change to `anthropic/claude-3-sonnet-20240229` +- **Use local models**: Point to Ollama, LM Studio, or other local endpoints +- **Multiple models**: Configure different models for different purposes + +### Application Tweaks +- Adjust the silence timeout (currently 15 seconds) in the client code +- Modify the system prompt to change AI personality or response style +- Add conversation history and context memory +- Implement voice authentication +- Add wake word detection +- Create custom UI themes + +### Docker Deployment Options +- **Production deployment**: Use Docker Swarm or Kubernetes +- **HTTPS/SSL**: Add Nginx reverse proxy for secure connections +- **Scaling**: Run multiple app instances behind a load balancer +- **Monitoring**: Add health checks and logging containers + +## The Technical Challenges I Faced + +Building this wasn't all sunshine and JavaScript. Here are some hurdles I overcame: + +1. **Browser Compatibility**: The Web Speech API isn't universally supported (I'm looking at you, Firefox) +2. **Silence Detection**: Finding the right timeout value that doesn't cut you off mid-sentence but also doesn't wait forever +3. **WebSocket Stability**: Ensuring connections remain stable and reconnect if broken +4. **Container Networking**: Getting the Node.js app to communicate with LiteLLM inside Docker +5. **API Response Formatting**: Ensuring voice-friendly responses that aren't too long or technical +6. **Environment Configuration**: Managing API keys and endpoints across development and production +7. **Voice Synthesis Quality**: Working with the limitations of browser-based speech synthesis + +## Why This Matters: The Future of Human-Computer Interaction + +Voice interfaces are becoming increasingly important. They're not just convenient—they're essential for: + +- Accessibility for those with mobility impairments +- Hands-free operation in industrial, medical, or culinary settings +- Reducing screen time while maintaining productivity +- Creating more natural human-computer interactions + +## Conclusion: Talk Is No Longer Cheap—It's Valuable! + +This project demonstrates how modern web technologies, containerization, and AI APIs can work together to create a sophisticated hands-free AI assistant. The combination of speech recognition, LiteLLM's universal API gateway, GROQ's lightning-fast inference, and speech synthesis creates an entirely new way to interact with artificial intelligence. + +By containerizing the application with Docker, we've made it incredibly easy to deploy and scale. The LiteLLM proxy adds flexibility that would have required significant engineering effort to build from scratch. And with GROQ's blazing-fast Llama3-70B, responses come back so quickly you'll forget you're talking to a machine. + +The three-tier architecture (Client → Node.js → LiteLLM → GROQ) might seem complex, but each component has a clear responsibility, making the system both maintainable and extensible. + +So next time you're up to your elbows in engine grease, bread dough, or finger paint, just run `docker-compose up -d` and remember that your AI assistant is just a few spoken words away! + +--- + +## Code Download + +Full code is available on my GitHub: [https://home.davidhsells.ca/Public/voicechat.git](https://home.davidhsells.ca/Public/voicechat.git/) + +--- + +*Have you built something similar or have ideas for improvements? Let me know in the comments below!* + +*Tags: #JavaScript #AI #SpeechRecognition #LLM #WebDevelopment #Accessibility* diff --git a/blog/images/VoiceChatWebapp.jpg b/blog/images/VoiceChatWebapp.jpg new file mode 100644 index 0000000..50bbdda --- /dev/null +++ b/blog/images/VoiceChatWebapp.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f14246077a9ec6387a9e41172a4410029e19c13dcd0f2b1636a17489f391e4 +size 199320 diff --git a/blog/images/clientserver.png b/blog/images/clientserver.png new file mode 100644 index 0000000..ef0553b --- /dev/null +++ b/blog/images/clientserver.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54bb0f80bc88a14374062bc3cd4718f20d625d1f06e205166b0b4184bc2bb673 +size 1782688 diff --git a/blog/images/headphones.png b/blog/images/headphones.png new file mode 100644 index 0000000..943cada --- /dev/null +++ b/blog/images/headphones.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e1635d43b1c56efe78218a5976c10b8753e19e2e38394ba973a4c5e5d3cee6 +size 1600790 diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..bd54c02 --- /dev/null +++ b/config.yaml @@ -0,0 +1,5 @@ +model_list: + - model_name: 'llama3-70b' + litellm_params: + model: 'groq/llama3-70b-8192' + api_key: YOUR_GROQ_API_KEY diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..0919d11 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,42 @@ +services: + litellm: + image: ghcr.io/berriai/litellm:main-latest + container_name: litellm + restart: always + ports: + - "4000:4000" + environment: + - LITELLM_MASTER_KEY=${API_KEY} + - GROQ_API_KEY=${GROQ_API_KEY} + env_file: + - .env + volumes: + - ./config.yaml:/app/config.yaml + command: --config /app/config.yaml --port 4000 + networks: + - app-network + app: + build: . + container_name: nodejs-app + ports: + - "9898:9898" # Adjust port if needed + environment: + - NODE_ENV=production + - LLM_API_ENDPOINT=http://litellm:4000/v1/chat/completions + env_file: + - .env + volumes: + # Optional: mount for development + - .:/app + # - /app/node_modules + restart: unless-stopped + networks: + - app-network + depends_on: + - litellm + +networks: + app-network: + driver: bridge + + diff --git a/index.html b/index.html new file mode 100644 index 0000000..d098527 --- /dev/null +++ b/index.html @@ -0,0 +1,572 @@ + + + + + + VoiceChat AI Assistant + + + +
+
+

VoiceChat AI Assistant

+

Demo

+

Your hands-free AI companion - just speak and let AI respond

+
+ +
+

How It Works

+

VoiceChat AI enables completely hands-free interaction with advanced AI models. Simply speak your question or command, pause when you're done, and listen to the AI's spoken response. Perfect for cooking, driving, working out, or whenever your hands are busy.

+
+ +
+
+
🎤
+

Voice Recognition

+

Automatically detects when you've finished speaking and processes your request

+
+
+
🧠
+

AI Processing

+

Connects to powerful language models to generate intelligent responses

+
+
+
🔊
+

Voice Response

+

Speaks the AI's answer back to you with natural-sounding speech

+
+
+ +
+

Start a Conversation

+

Click the button below and start speaking after you see "Listening..."

+ + +
+
+ Status: Ready +
+
+ +
+
+

🗣️ Your Voice Input

+

This shows what the system hears you saying in real-time:

+
+
+ +
+

🤖 AI Response

+

The AI's answer will appear here and be spoken aloud:

+
+
+
+ +
+

VoiceChat AI Assistant uses your browser's Speech Recognition API and connects to language models via WebSockets

+

Your audio stays in your browser - only the text is sent for processing

+
+
+ + + + diff --git a/index.js b/index.js new file mode 100644 index 0000000..b97ab82 --- /dev/null +++ b/index.js @@ -0,0 +1,137 @@ +const http = require('http'); +const fs = require('fs'); +const WebSocket = require('ws'); +require('dotenv').config(); + + +// Configuration +const PORT = 9898; +const API_KEY = 'your_litellm_secret_key'; // consider process.env.API_KEY; // Replace with your actual API key +const LLM_MODEL = 'llama3-70b'; +const HTML_FILE = 'index.html'; + +/** + * Create and configure HTTP server + * @returns {http.Server} Configured HTTP server + */ +function createHttpServer() { + return http.createServer(handleHttpRequest); +} + +/** + * Handle HTTP requests by serving the HTML file + * @param {http.IncomingMessage} req - HTTP request object + * @param {http.ServerResponse} res - HTTP response object + */ +function handleHttpRequest(req, res) { + res.writeHead(200, {'Content-Type': 'text/html'}); + + fs.readFile(HTML_FILE, (err, data) => { + if (err) { + console.error('Error reading HTML file:', err); + res.statusCode = 500; + res.end('Error getting the file'); + return; + } + res.end(data); + }); +} + +/** + * Set up WebSocket server + * @param {http.Server} server - HTTP server to attach WebSocket to + * @returns {WebSocket.Server} Configured WebSocket server + */ +function setupWebSocketServer(server) { + const wss = new WebSocket.Server({ server }); + + wss.on('connection', handleWebSocketConnection); + + return wss; +} + +/** + * Handle new WebSocket connections + * @param {WebSocket} ws - WebSocket connection + */ +function handleWebSocketConnection(ws) { + console.log('Client connected'); + + ws.on('message', (message) => handleIncomingMessage(ws, message)); + + ws.on('close', () => { + console.log('Client disconnected'); + }); +} + +/** + * Process incoming WebSocket messages + * @param {WebSocket} ws - WebSocket connection + * @param {string} message - Received message + */ +function handleIncomingMessage(ws, message) { + console.log(`Received message => ${message}`); + queryLLM(ws, message.toString()); +} + +/** + * Query the language model API + * @param {WebSocket} ws - WebSocket connection to send the response + * @param {string} message - User message to process + * @returns {Promise} The LLM response or null on error + */ +async function queryLLM(ws, message) { + try { + console.log(`\n\n\nWhat is the key: ${API_KEY}`); + const got = (await import('got')).default; + const response = await got(process.env.LLM_API_ENDPOINT, { + method: 'POST', + headers: { + 'content-type': 'application/json', + 'Authorization': `Bearer ${API_KEY}`, + }, + json: { + model: LLM_MODEL, + messages: [ + { + "role": "system", + "content": "Keep response to 4 lines of text." + }, + { role: 'user', content: message } + ], + max_tokens: 1000 + } + }); + + const data = JSON.parse(response.body); + + if (data?.choices?.[0]?.message?.content) { + const content = data.choices[0].message.content; + ws.send(content); + return content; + } else { + console.error('Invalid or empty response from LLM API'); + ws.send('Sorry, I received an invalid response from the language model.'); + return null; + } + } catch (error) { + console.error('Error querying LLM:', error); + ws.send('Sorry, there was an error processing your request.'); + return null; + } +} + +/** + * Initialize and start the server + */ +function startServer() { + const server = createHttpServer(); + setupWebSocketServer(server); + + server.listen(PORT, () => { + console.log(`Server listening on port ${PORT}`); + }); +} + +// Start the server when this file is run +startServer(); diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..fcbd0e8 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,229 @@ +{ + "name": "talk", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@sec-ant/readable-stream": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@sec-ant/readable-stream/-/readable-stream-0.4.1.tgz", + "integrity": "sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg==" + }, + "@sindresorhus/is": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-7.0.1.tgz", + "integrity": "sha512-QWLl2P+rsCJeofkDNIT3WFmb6NrRud1SUYW8dIhXK/46XFV8Q/g7Bsvib0Askb0reRLe+WYPeeE+l5cH7SlkuQ==" + }, + "@szmarczak/http-timer": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz", + "integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==", + "requires": { + "defer-to-connect": "^2.0.1" + } + }, + "@types/http-cache-semantics": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.4.tgz", + "integrity": "sha512-1m0bIFVc7eJWyve9S0RnuRgcQqF/Xd5QsUZAZeQFr1Q3/p9JWoQQEqmVy+DPTNpGXwhgIetAoYF8JSc33q29QA==" + }, + "cacheable-lookup": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz", + "integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w==" + }, + "cacheable-request": { + "version": "12.0.1", + "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-12.0.1.tgz", + "integrity": "sha512-Yo9wGIQUaAfIbk+qY0X4cDQgCosecfBe3V9NSyeY4qPC2SAkbCS4Xj79VP8WOzitpJUZKc/wsRCYF5ariDIwkg==", + "requires": { + "@types/http-cache-semantics": "^4.0.4", + "get-stream": "^9.0.1", + "http-cache-semantics": "^4.1.1", + "keyv": "^4.5.4", + "mimic-response": "^4.0.0", + "normalize-url": "^8.0.1", + "responselike": "^3.0.0" + } + }, + "data-uri-to-buffer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz", + "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==" + }, + "decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "requires": { + "mimic-response": "^3.1.0" + }, + "dependencies": { + "mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==" + } + } + }, + "defer-to-connect": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", + "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==" + }, + "dotenv": { + "version": "16.5.0", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz", + "integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==" + }, + "fetch-blob": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz", + "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==", + "requires": { + "node-domexception": "^1.0.0", + "web-streams-polyfill": "^3.0.3" + } + }, + "form-data-encoder": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-4.0.2.tgz", + "integrity": "sha512-KQVhvhK8ZkWzxKxOr56CPulAhH3dobtuQ4+hNQ+HekH/Wp5gSOafqRAeTphQUJAIk0GBvHZgJ2ZGRWd5kphMuw==" + }, + "formdata-polyfill": { + "version": "4.0.10", + "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", + "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==", + "requires": { + "fetch-blob": "^3.1.2" + } + }, + "get-stream": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-9.0.1.tgz", + "integrity": "sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA==", + "requires": { + "@sec-ant/readable-stream": "^0.4.1", + "is-stream": "^4.0.1" + } + }, + "got": { + "version": "14.4.7", + "resolved": "https://registry.npmjs.org/got/-/got-14.4.7.tgz", + "integrity": "sha512-DI8zV1231tqiGzOiOzQWDhsBmncFW7oQDH6Zgy6pDPrqJuVZMtoSgPLLsBZQj8Jg4JFfwoOsDA8NGtLQLnIx2g==", + "requires": { + "@sindresorhus/is": "^7.0.1", + "@szmarczak/http-timer": "^5.0.1", + "cacheable-lookup": "^7.0.0", + "cacheable-request": "^12.0.1", + "decompress-response": "^6.0.0", + "form-data-encoder": "^4.0.2", + "http2-wrapper": "^2.2.1", + "lowercase-keys": "^3.0.0", + "p-cancelable": "^4.0.1", + "responselike": "^3.0.0", + "type-fest": "^4.26.1" + } + }, + "http-cache-semantics": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz", + "integrity": "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==" + }, + "http2-wrapper": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.1.tgz", + "integrity": "sha512-V5nVw1PAOgfI3Lmeaj2Exmeg7fenjhRUgz1lPSezy1CuhPYbgQtbQj4jZfEAEMlaL+vupsvhjqCyjzob0yxsmQ==", + "requires": { + "quick-lru": "^5.1.1", + "resolve-alpn": "^1.2.0" + } + }, + "is-stream": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-4.0.1.tgz", + "integrity": "sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==" + }, + "json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==" + }, + "keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "requires": { + "json-buffer": "3.0.1" + } + }, + "lowercase-keys": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz", + "integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ==" + }, + "mimic-response": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz", + "integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg==" + }, + "node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==" + }, + "node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "requires": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + } + }, + "normalize-url": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.1.tgz", + "integrity": "sha512-IO9QvjUMWxPQQhs60oOu10CRkWCiZzSUkzbXGGV9pviYl1fXYcvkzQ5jV9z8Y6un8ARoVRl4EtC6v6jNqbaJ/w==" + }, + "p-cancelable": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-4.0.1.tgz", + "integrity": "sha512-wBowNApzd45EIKdO1LaU+LrMBwAcjfPaYtVzV3lmfM3gf8Z4CHZsiIqlM8TZZ8okYvh5A1cP6gTfCRQtwUpaUg==" + }, + "quick-lru": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", + "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==" + }, + "resolve-alpn": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz", + "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==" + }, + "responselike": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz", + "integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==", + "requires": { + "lowercase-keys": "^3.0.0" + } + }, + "type-fest": { + "version": "4.41.0", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", + "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==" + }, + "web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==" + }, + "ws": { + "version": "8.18.2", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz", + "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..093689f --- /dev/null +++ b/package.json @@ -0,0 +1,28 @@ +{ + "name": "talk", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "start": "node index.js" + }, + "keywords": [ + "chatbot", + "voicebot", + "nodejs", + "ai", + "conversational", + "Speech Recognition", + "Speech Synthesis", + "messaging" + ], + "author": "David Howard Sells", + "license": "ISC", + "dependencies": { + "dotenv": "^16.5.0", + "got": "^14.4.7", + "node-fetch": "^3.3.2", + "ws": "^8.18.2" + } +}