From 18d594e73c67a54b2d46fe8d520bbbca3927f8b2 Mon Sep 17 00:00:00 2001 From: jwansek Date: Fri, 1 Mar 2024 14:25:52 +0000 Subject: Added not listening by default, calling the ollama service after a transcription --- .../src/whisperwrapper/src/whisperwrapper.py | 28 +++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'noetic-llama/src/whisperwrapper') diff --git a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py index 4ebaebf..439c8de 100644 --- a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py +++ b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 -from ollamamessages.msg import WhisperTranscription +from ollamamessages.msg import WhisperTranscription, WhisperListening +from ollamamessages.srv import OllamaCall, OllamaCallResponse import speech_recognition as sr +import threading import tempfile import requests import rospy @@ -15,13 +17,22 @@ pause = rospy.get_param("/stt/speech_recogn_pause_time", 0.8) energy = rospy.get_param("/stt/speech_recogn_energy", 400) dynamic_energy = rospy.get_param("/stt/speech_recogn_dyn_energy_flag", False) microphone_device = rospy.get_param("/stt/microphone_device", 1) +no_speech_thresh = rospy.get_param("/stt/speech_confidence_thresh", 0.1) class WhisperWrapper: + + listening = False + def __init__(self) -> None: self.transcription_pub = rospy.Publisher("/stt/transcription", WhisperTranscription, queue_size = 1) + self.listening_sub = rospy.Subscriber("/stt/listening", WhisperListening, self.listening_sub_cb) self.record_audio(pause, energy, dynamic_energy, microphone_device) + def listening_sub_cb(self, set_listening): + rospy.loginfo("Set listening = %s" % str(set_listening.listening)) + self.listening = set_listening.listening + def record_audio(self, pause, energy, dynamic_energy, microphone_device): recogniser = sr.Recognizer() recogniser.energy_threshold = energy @@ -33,6 +44,10 @@ class WhisperWrapper: while True and not rospy.is_shutdown(): audio = recogniser.listen(microphone) + if not self.listening: + rospy.loginfo("I heard something but I'm stopping here because we've been set to not listen") + continue + with tempfile.NamedTemporaryFile(mode = "wb", suffix = ".wav", delete = False) as f: audio_path = f.name f.write(audio.get_wav_data()) @@ -55,6 +70,17 @@ class WhisperWrapper: no_speech_prob = o["segments"][0]["no_speech_prob"] ) + if o["segments"][0]["no_speech_prob"] < no_speech_thresh: + self.run_ollama(o["text"]) + else: + rospy.loginfo("Skipped due to low confidence it's actually speech.") + + def run_ollama(self, text): + service_call = rospy.ServiceProxy("/stt/ollamacall", OllamaCall) + response = service_call(input = text) + print(response) + + if __name__ == "__main__": -- cgit v1.2.3