From c2469c0e009ca83b10272c1104cdef6476bdb2dc Mon Sep 17 00:00:00 2001 From: Jaye Heffernan Date: Sun, 21 Jan 2024 06:29:34 +1100 Subject: [PATCH 1/2] feat: configurable whisper recording command (#91) * Add option to default config * Override automatic detection * chore: deprecate whisper_max_time * feat: fully configurable whisper_rec_cmd * chore: formating * fix: typo --- lua/gp/config.lua | 13 ++++++++-- lua/gp/init.lua | 60 ++++++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/lua/gp/config.lua b/lua/gp/config.lua index 0caa00d4..2c426905 100644 --- a/lua/gp/config.lua +++ b/lua/gp/config.lua @@ -179,12 +179,21 @@ local config = { -- decrease this number to pick up only louder sounds as possible speech -- you can disable silence trimming by setting this a very high number (like 1000.0) whisper_silence = "1.75", - -- whisper max recording time (mm:ss) - whisper_max_time = "05:00", -- whisper tempo (1.0 is normal speed) whisper_tempo = "1.75", -- The language of the input audio, in ISO-639-1 format. whisper_language = "en", + -- command to use for recording can be nil (unset) for automatic selection + -- string ("sox", "arecord", "ffmpeg") or table with command and arguments: + -- sox is the most universal, but can have start/end cropping issues caused by latency + -- arecord is linux only, but has no cropping issues and is faster + -- ffmpeg in the default configuration is macos only, but can be used on any platform + -- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info) + -- below is the default configuration for all three commands: + -- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"}, + -- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"}, + -- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"}, + whisper_rec_cmd = nil, -- image generation settings -- image prompt prefix for asking user for input (supports {{agent}} template variable) diff --git a/lua/gp/init.lua b/lua/gp/init.lua index a9690fba..567e95ff 100644 --- a/lua/gp/init.lua +++ b/lua/gp/init.lua @@ -17,6 +17,7 @@ local deprecated = { chat_system_prompt = "`chat_system_prompt`\n" .. switch_to_agent, command_prompt_prefix = "`command_prompt_prefix`\nPlease use `command_prompt_prefix_template`" .. " with support for \n`{{agent}}` variable so you know which agent is currently active", + whisper_max_time = "`whisper_max_time`\nPlease use fully customizable `whisper_rec_cmd`", } -------------------------------------------------------------------------------- @@ -2784,41 +2785,35 @@ M.Whisper = function(callback) return end + local rec_file = M.config.whisper_dir .. "/rec.wav" local rec_options = { sox = { cmd = "sox", opts = { - -- single channel "-c", "1", - -- small buffer "--buffer", "32", "-d", - -- output file - M.config.whisper_dir .. "/rec.wav", - -- max recording time + "rec.wav", "trim", "0", - M.config.whisper_max_time, + "3600", }, exit_code = 0, }, arecord = { cmd = "arecord", opts = { - -- single channel "-c", "1", "-f", "S16_LE", "-r", "48000", - -- max recording time "-d", 3600, - -- output file - M.config.whisper_dir .. "/rec.wav", + "rec.wav", }, exit_code = 1, }, @@ -2832,7 +2827,7 @@ M.Whisper = function(callback) ":0", "-t", "3600", - M.config.whisper_dir .. "/rec.wav", + "rec.wav", }, exit_code = 255, }, @@ -2968,25 +2963,48 @@ M.Whisper = function(callback) end) end - local rec_cmd = "sox" - if vim.fn.executable("ffmpeg") == 1 then - local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l") - devices = string.gsub(devices, "^%s*(.-)%s*$", "%1") - if devices == "1" then - rec_cmd = "ffmpeg" + local cmd = {} + + local rec_cmd = M.config.whisper_rec_cmd + -- if rec_cmd not set explicitly, try to autodetect + if not rec_cmd then + rec_cmd = "sox" + if vim.fn.executable("ffmpeg") == 1 then + local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l") + devices = string.gsub(devices, "^%s*(.-)%s*$", "%1") + if devices == "1" then + rec_cmd = "ffmpeg" + end end + if vim.fn.executable("arecord") == 1 then + rec_cmd = "arecord" + end + end + + if type(rec_cmd) == "table" and rec_cmd[1] and rec_options[rec_cmd[1]] then + rec_cmd = vim.deepcopy(rec_cmd) + cmd.cmd = table.remove(rec_cmd, 1) + cmd.exit_code = rec_options[cmd.cmd].exit_code + cmd.opts = rec_cmd + elseif type(rec_cmd) == "string" and rec_options[rec_cmd] then + cmd = rec_options[rec_cmd] + else + M.error(string.format("Whisper got invalid recording command: %s", rec_cmd)) + close() + return end - if vim.fn.executable("arecord") == 1 then - rec_cmd = "arecord" + for i, v in ipairs(cmd.opts) do + if v == "rec.wav" then + cmd.opts[i] = rec_file + end end - local cmd = rec_options[rec_cmd] M._H.process(nil, cmd.cmd, cmd.opts, function(code, signal, stdout, stderr) close() if code and code ~= cmd.exit_code then M.error( - rec_cmd + cmd.cmd .. " exited with code and signal:\ncode: " .. code .. ", signal: " From 17b775abbf8bdf0fcea8149fd339a69f729ebb6f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 20 Jan 2024 19:29:57 +0000 Subject: [PATCH 2/2] chore: auto-generate vimdoc --- doc/gp.nvim.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/gp.nvim.txt b/doc/gp.nvim.txt index 3eda6a84..921f598c 100644 --- a/doc/gp.nvim.txt +++ b/doc/gp.nvim.txt @@ -1,4 +1,4 @@ -*gp.nvim.txt* For NVIM v0.8.0 Last change: 2024 January 14 +*gp.nvim.txt* For NVIM v0.8.0 Last change: 2024 January 20 ============================================================================== Table of Contents *gp.nvim-table-of-contents*