diff options
4 files changed, 30 insertions, 2 deletions
diff --git a/noetic-llama/src/ollamamessages/CMakeLists.txt b/noetic-llama/src/ollamamessages/CMakeLists.txt index 7f1e55c..b67d181 100644 --- a/noetic-llama/src/ollamamessages/CMakeLists.txt +++ b/noetic-llama/src/ollamamessages/CMakeLists.txt @@ -51,6 +51,7 @@ find_package(catkin REQUIRED COMPONENTS add_message_files( FILES WhisperTranscription.msg + WhisperListening.msg ) ## Generate services in the 'srv' folder diff --git a/noetic-llama/src/ollamamessages/msg/WhisperListening.msg b/noetic-llama/src/ollamamessages/msg/WhisperListening.msg new file mode 100644 index 0000000..e2a1506 --- /dev/null +++ b/noetic-llama/src/ollamamessages/msg/WhisperListening.msg @@ -0,0 +1 @@ +bool listening
\ No newline at end of file diff --git a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py index 627fa58..66843f8 100644 --- a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py +++ b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py @@ -15,7 +15,7 @@ import capabilities from capabilities import * ollama_api_url = rospy.get_param("/stt/ollama_api_url", "192.168.122.1:11434") -base_ollama_model = rospy.get_param("/stt/ollama_base_model", "nexusraven:13b-q3_K_S") +base_ollama_model = rospy.get_param("/stt/ollama_base_model", "nexusraven:13b-v2-q2_K") @dataclass class FunctionCapability: diff --git a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py index 4ebaebf..439c8de 100644 --- a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py +++ b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 -from ollamamessages.msg import WhisperTranscription +from ollamamessages.msg import WhisperTranscription, WhisperListening +from ollamamessages.srv import OllamaCall, OllamaCallResponse import speech_recognition as sr +import threading import tempfile import requests import rospy @@ -15,13 +17,22 @@ pause = rospy.get_param("/stt/speech_recogn_pause_time", 0.8) energy = rospy.get_param("/stt/speech_recogn_energy", 400) dynamic_energy = rospy.get_param("/stt/speech_recogn_dyn_energy_flag", False) microphone_device = rospy.get_param("/stt/microphone_device", 1) +no_speech_thresh = rospy.get_param("/stt/speech_confidence_thresh", 0.1) class WhisperWrapper: + + listening = False + def __init__(self) -> None: self.transcription_pub = rospy.Publisher("/stt/transcription", WhisperTranscription, queue_size = 1) + self.listening_sub = rospy.Subscriber("/stt/listening", WhisperListening, self.listening_sub_cb) self.record_audio(pause, energy, dynamic_energy, microphone_device) + def listening_sub_cb(self, set_listening): + rospy.loginfo("Set listening = %s" % str(set_listening.listening)) + self.listening = set_listening.listening + def record_audio(self, pause, energy, dynamic_energy, microphone_device): recogniser = sr.Recognizer() recogniser.energy_threshold = energy @@ -33,6 +44,10 @@ class WhisperWrapper: while True and not rospy.is_shutdown(): audio = recogniser.listen(microphone) + if not self.listening: + rospy.loginfo("I heard something but I'm stopping here because we've been set to not listen") + continue + with tempfile.NamedTemporaryFile(mode = "wb", suffix = ".wav", delete = False) as f: audio_path = f.name f.write(audio.get_wav_data()) @@ -55,6 +70,17 @@ class WhisperWrapper: no_speech_prob = o["segments"][0]["no_speech_prob"] ) + if o["segments"][0]["no_speech_prob"] < no_speech_thresh: + self.run_ollama(o["text"]) + else: + rospy.loginfo("Skipped due to low confidence it's actually speech.") + + def run_ollama(self, text): + service_call = rospy.ServiceProxy("/stt/ollamacall", OllamaCall) + response = service_call(input = text) + print(response) + + if __name__ == "__main__": |