aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--noetic-llama/src/ollamamessages/CMakeLists.txt1
-rw-r--r--noetic-llama/src/ollamamessages/msg/WhisperListening.msg1
-rw-r--r--noetic-llama/src/ollamawrapper/src/ollamawrapper.py2
-rw-r--r--noetic-llama/src/whisperwrapper/src/whisperwrapper.py28
4 files changed, 30 insertions, 2 deletions
diff --git a/noetic-llama/src/ollamamessages/CMakeLists.txt b/noetic-llama/src/ollamamessages/CMakeLists.txt
index 7f1e55c..b67d181 100644
--- a/noetic-llama/src/ollamamessages/CMakeLists.txt
+++ b/noetic-llama/src/ollamamessages/CMakeLists.txt
@@ -51,6 +51,7 @@ find_package(catkin REQUIRED COMPONENTS
add_message_files(
FILES
WhisperTranscription.msg
+ WhisperListening.msg
)
## Generate services in the 'srv' folder
diff --git a/noetic-llama/src/ollamamessages/msg/WhisperListening.msg b/noetic-llama/src/ollamamessages/msg/WhisperListening.msg
new file mode 100644
index 0000000..e2a1506
--- /dev/null
+++ b/noetic-llama/src/ollamamessages/msg/WhisperListening.msg
@@ -0,0 +1 @@
+bool listening \ No newline at end of file
diff --git a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py
index 627fa58..66843f8 100644
--- a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py
+++ b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py
@@ -15,7 +15,7 @@ import capabilities
from capabilities import *
ollama_api_url = rospy.get_param("/stt/ollama_api_url", "192.168.122.1:11434")
-base_ollama_model = rospy.get_param("/stt/ollama_base_model", "nexusraven:13b-q3_K_S")
+base_ollama_model = rospy.get_param("/stt/ollama_base_model", "nexusraven:13b-v2-q2_K")
@dataclass
class FunctionCapability:
diff --git a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py
index 4ebaebf..439c8de 100644
--- a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py
+++ b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py
@@ -1,8 +1,10 @@
#!/usr/bin/env python3
-from ollamamessages.msg import WhisperTranscription
+from ollamamessages.msg import WhisperTranscription, WhisperListening
+from ollamamessages.srv import OllamaCall, OllamaCallResponse
import speech_recognition as sr
+import threading
import tempfile
import requests
import rospy
@@ -15,13 +17,22 @@ pause = rospy.get_param("/stt/speech_recogn_pause_time", 0.8)
energy = rospy.get_param("/stt/speech_recogn_energy", 400)
dynamic_energy = rospy.get_param("/stt/speech_recogn_dyn_energy_flag", False)
microphone_device = rospy.get_param("/stt/microphone_device", 1)
+no_speech_thresh = rospy.get_param("/stt/speech_confidence_thresh", 0.1)
class WhisperWrapper:
+
+ listening = False
+
def __init__(self) -> None:
self.transcription_pub = rospy.Publisher("/stt/transcription", WhisperTranscription, queue_size = 1)
+ self.listening_sub = rospy.Subscriber("/stt/listening", WhisperListening, self.listening_sub_cb)
self.record_audio(pause, energy, dynamic_energy, microphone_device)
+ def listening_sub_cb(self, set_listening):
+ rospy.loginfo("Set listening = %s" % str(set_listening.listening))
+ self.listening = set_listening.listening
+
def record_audio(self, pause, energy, dynamic_energy, microphone_device):
recogniser = sr.Recognizer()
recogniser.energy_threshold = energy
@@ -33,6 +44,10 @@ class WhisperWrapper:
while True and not rospy.is_shutdown():
audio = recogniser.listen(microphone)
+ if not self.listening:
+ rospy.loginfo("I heard something but I'm stopping here because we've been set to not listen")
+ continue
+
with tempfile.NamedTemporaryFile(mode = "wb", suffix = ".wav", delete = False) as f:
audio_path = f.name
f.write(audio.get_wav_data())
@@ -55,6 +70,17 @@ class WhisperWrapper:
no_speech_prob = o["segments"][0]["no_speech_prob"]
)
+ if o["segments"][0]["no_speech_prob"] < no_speech_thresh:
+ self.run_ollama(o["text"])
+ else:
+ rospy.loginfo("Skipped due to low confidence it's actually speech.")
+
+ def run_ollama(self, text):
+ service_call = rospy.ServiceProxy("/stt/ollamacall", OllamaCall)
+ response = service_call(input = text)
+ print(response)
+
+
if __name__ == "__main__":