diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2024-02-29 20:49:37 +0000 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2024-02-29 20:49:37 +0000 |
commit | 52565291cbc2c507a22236ab628db1f9f52034f1 (patch) | |
tree | 81121547b1e69bee7710624642dd7f01a79dfb82 | |
parent | 526a3e84b181e4108412686a0d185714e1ccf51e (diff) | |
download | noetic-llama-52565291cbc2c507a22236ab628db1f9f52034f1.tar.gz noetic-llama-52565291cbc2c507a22236ab628db1f9f52034f1.zip |
Added whisper node
-rw-r--r-- | .devcontainer/Dockerfile | 2 | ||||
-rw-r--r-- | noetic-llama/src/ollamamessages/CMakeLists.txt | 9 | ||||
-rw-r--r-- | noetic-llama/src/ollamamessages/msg/WhisperTranscription.msg | 6 | ||||
-rw-r--r-- | noetic-llama/src/ollamawrapper/src/ollamawrapper.py | 2 | ||||
-rw-r--r-- | noetic-llama/src/whisperwrapper/CMakeLists.txt | 206 | ||||
-rw-r--r-- | noetic-llama/src/whisperwrapper/package.xml | 68 | ||||
-rw-r--r-- | noetic-llama/src/whisperwrapper/src/list_microphones.py | 4 | ||||
-rw-r--r-- | noetic-llama/src/whisperwrapper/src/whisperwrapper.py | 65 |
8 files changed, 355 insertions, 7 deletions
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6992623..9bcb4d7 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -26,7 +26,7 @@ RUN sudo apt install -y git python3-pip # Rosdep update RUN rosdep update -RUN pip3 install jinja2 ollama geocoder requests python-dotenv parsimonious +RUN pip3 install jinja2 ollama geocoder requests python-dotenv parsimonious SpeechRecognition # Source the ROS setup file RUN echo "source /opt/ros/${ROS_DISTRO}/setup.bash" >> ~/.bashrc diff --git a/noetic-llama/src/ollamamessages/CMakeLists.txt b/noetic-llama/src/ollamamessages/CMakeLists.txt index b397131..7f1e55c 100644 --- a/noetic-llama/src/ollamamessages/CMakeLists.txt +++ b/noetic-llama/src/ollamamessages/CMakeLists.txt @@ -48,11 +48,10 @@ find_package(catkin REQUIRED COMPONENTS ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) ## Generate messages in the 'msg' folder -# add_message_files( -# FILES -# Message1.msg -# Message2.msg -# ) +add_message_files( + FILES + WhisperTranscription.msg +) ## Generate services in the 'srv' folder add_service_files( diff --git a/noetic-llama/src/ollamamessages/msg/WhisperTranscription.msg b/noetic-llama/src/ollamamessages/msg/WhisperTranscription.msg new file mode 100644 index 0000000..985e998 --- /dev/null +++ b/noetic-llama/src/ollamamessages/msg/WhisperTranscription.msg @@ -0,0 +1,6 @@ +string text +string language +float64 temperature +float64 avg_logprob +float64 compression_ratio +float64 no_speech_prob
\ No newline at end of file diff --git a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py index f344299..0bf0d99 100644 --- a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py +++ b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py @@ -87,7 +87,7 @@ def handle_ollama_call(req): def handle_ollama_server(): rospy.init_node("ollama_wrapper_server") - s = rospy.Service("ollama_wrapper", OllamaCall, handle_ollama_call) + s = rospy.Service("/stt/ollamacall", OllamaCall, handle_ollama_call) print("Spin") rospy.spin() diff --git a/noetic-llama/src/whisperwrapper/CMakeLists.txt b/noetic-llama/src/whisperwrapper/CMakeLists.txt new file mode 100644 index 0000000..6d3f9a4 --- /dev/null +++ b/noetic-llama/src/whisperwrapper/CMakeLists.txt @@ -0,0 +1,206 @@ +cmake_minimum_required(VERSION 3.0.2) +project(whisperwrapper) + +## Compile as C++11, supported in ROS Kinetic and newer +# add_compile_options(-std=c++11) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED COMPONENTS + roscpp + rospy + std_msgs +) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + + +## Uncomment this if the package has a setup.py. This macro ensures +## modules and global scripts declared therein get installed +## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html +# catkin_python_setup() + +################################################ +## Declare ROS messages, services and actions ## +################################################ + +## To declare and build messages, services or actions from within this +## package, follow these steps: +## * Let MSG_DEP_SET be the set of packages whose message types you use in +## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). +## * In the file package.xml: +## * add a build_depend tag for "message_generation" +## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET +## * If MSG_DEP_SET isn't empty the following dependency has been pulled in +## but can be declared for certainty nonetheless: +## * add a exec_depend tag for "message_runtime" +## * In this file (CMakeLists.txt): +## * add "message_generation" and every package in MSG_DEP_SET to +## find_package(catkin REQUIRED COMPONENTS ...) +## * add "message_runtime" and every package in MSG_DEP_SET to +## catkin_package(CATKIN_DEPENDS ...) +## * uncomment the add_*_files sections below as needed +## and list every .msg/.srv/.action file to be processed +## * uncomment the generate_messages entry below +## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) + +## Generate messages in the 'msg' folder +# add_message_files( +# FILES +# Message1.msg +# Message2.msg +# ) + +## Generate services in the 'srv' folder +# add_service_files( +# FILES +# Service1.srv +# Service2.srv +# ) + +## Generate actions in the 'action' folder +# add_action_files( +# FILES +# Action1.action +# Action2.action +# ) + +## Generate added messages and services with any dependencies listed here +# generate_messages( +# DEPENDENCIES +# std_msgs +# ) + +################################################ +## Declare ROS dynamic reconfigure parameters ## +################################################ + +## To declare and build dynamic reconfigure parameters within this +## package, follow these steps: +## * In the file package.xml: +## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" +## * In this file (CMakeLists.txt): +## * add "dynamic_reconfigure" to +## find_package(catkin REQUIRED COMPONENTS ...) +## * uncomment the "generate_dynamic_reconfigure_options" section below +## and list every .cfg file to be processed + +## Generate dynamic reconfigure parameters in the 'cfg' folder +# generate_dynamic_reconfigure_options( +# cfg/DynReconf1.cfg +# cfg/DynReconf2.cfg +# ) + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if your package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( +# INCLUDE_DIRS include +# LIBRARIES whisperwrapper +# CATKIN_DEPENDS roscpp rospy std_msgs +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +include_directories( +# include + ${catkin_INCLUDE_DIRS} +) + +## Declare a C++ library +# add_library(${PROJECT_NAME} +# src/${PROJECT_NAME}/whisperwrapper.cpp +# ) + +## Add cmake target dependencies of the library +## as an example, code may need to be generated before libraries +## either from message generation or dynamic reconfigure +# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Declare a C++ executable +## With catkin_make all packages are built within a single CMake context +## The recommended prefix ensures that target names across packages don't collide +# add_executable(${PROJECT_NAME}_node src/whisperwrapper_node.cpp) + +## Rename C++ executable without prefix +## The above recommended prefix causes long target names, the following renames the +## target back to the shorter version for ease of user use +## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" +# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") + +## Add cmake target dependencies of the executable +## same as for the library above +# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Specify libraries to link a library or executable target against +# target_link_libraries(${PROJECT_NAME}_node +# ${catkin_LIBRARIES} +# ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +catkin_install_python(PROGRAMS + src/whisperwrapper.py + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +## Mark executables for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html +# install(TARGETS ${PROJECT_NAME}_node +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark libraries for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html +# install(TARGETS ${PROJECT_NAME} +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_whisperwrapper.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) diff --git a/noetic-llama/src/whisperwrapper/package.xml b/noetic-llama/src/whisperwrapper/package.xml new file mode 100644 index 0000000..52f3fd7 --- /dev/null +++ b/noetic-llama/src/whisperwrapper/package.xml @@ -0,0 +1,68 @@ +<?xml version="1.0"?> +<package format="2"> + <name>whisperwrapper</name> + <version>0.0.0</version> + <description>The whisperwrapper package</description> + + <!-- One maintainer tag required, multiple allowed, one person per tag --> + <!-- Example: --> + <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> --> + <maintainer email="eden@todo.todo">eden</maintainer> + + + <!-- One license tag required, multiple allowed, one license per tag --> + <!-- Commonly used license strings: --> + <!-- BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 --> + <license>TODO</license> + + + <!-- Url tags are optional, but multiple are allowed, one per tag --> + <!-- Optional attribute type can be: website, bugtracker, or repository --> + <!-- Example: --> + <!-- <url type="website">http://wiki.ros.org/whisperwrapper</url> --> + + + <!-- Author tags are optional, multiple are allowed, one per tag --> + <!-- Authors do not have to be maintainers, but could be --> + <!-- Example: --> + <!-- <author email="jane.doe@example.com">Jane Doe</author> --> + + + <!-- The *depend tags are used to specify dependencies --> + <!-- Dependencies can be catkin packages or system dependencies --> + <!-- Examples: --> + <!-- Use depend as a shortcut for packages that are both build and exec dependencies --> + <!-- <depend>roscpp</depend> --> + <!-- Note that this is equivalent to the following: --> + <!-- <build_depend>roscpp</build_depend> --> + <!-- <exec_depend>roscpp</exec_depend> --> + <!-- Use build_depend for packages you need at compile time: --> + <!-- <build_depend>message_generation</build_depend> --> + <!-- Use build_export_depend for packages you need in order to build against this package: --> + <!-- <build_export_depend>message_generation</build_export_depend> --> + <!-- Use buildtool_depend for build tool packages: --> + <!-- <buildtool_depend>catkin</buildtool_depend> --> + <!-- Use exec_depend for packages you need at runtime: --> + <!-- <exec_depend>message_runtime</exec_depend> --> + <!-- Use test_depend for packages you need only for testing: --> + <!-- <test_depend>gtest</test_depend> --> + <!-- Use doc_depend for packages you need only for building documentation: --> + <!-- <doc_depend>doxygen</doc_depend> --> + <buildtool_depend>catkin</buildtool_depend> + <build_depend>roscpp</build_depend> + <build_depend>rospy</build_depend> + <build_depend>std_msgs</build_depend> + <build_export_depend>roscpp</build_export_depend> + <build_export_depend>rospy</build_export_depend> + <build_export_depend>std_msgs</build_export_depend> + <exec_depend>roscpp</exec_depend> + <exec_depend>rospy</exec_depend> + <exec_depend>std_msgs</exec_depend> + + + <!-- The export tag contains other, unspecified, tags --> + <export> + <!-- Other tools can request additional information be placed here --> + + </export> +</package> diff --git a/noetic-llama/src/whisperwrapper/src/list_microphones.py b/noetic-llama/src/whisperwrapper/src/list_microphones.py new file mode 100644 index 0000000..d5b49ff --- /dev/null +++ b/noetic-llama/src/whisperwrapper/src/list_microphones.py @@ -0,0 +1,4 @@ +import speech_recognition as sr + +for index, name in enumerate(sr.Microphone.list_microphone_names()): + print("Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
\ No newline at end of file diff --git a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py new file mode 100644 index 0000000..4ebaebf --- /dev/null +++ b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +from ollamamessages.msg import WhisperTranscription + +import speech_recognition as sr +import tempfile +import requests +import rospy +import time +import json +import os + +whisper_api_url = rospy.get_param("/stt/whisper_api_url", "192.168.122.1:9000") +pause = rospy.get_param("/stt/speech_recogn_pause_time", 0.8) +energy = rospy.get_param("/stt/speech_recogn_energy", 400) +dynamic_energy = rospy.get_param("/stt/speech_recogn_dyn_energy_flag", False) +microphone_device = rospy.get_param("/stt/microphone_device", 1) + +class WhisperWrapper: + def __init__(self) -> None: + self.transcription_pub = rospy.Publisher("/stt/transcription", WhisperTranscription, queue_size = 1) + + self.record_audio(pause, energy, dynamic_energy, microphone_device) + + def record_audio(self, pause, energy, dynamic_energy, microphone_device): + recogniser = sr.Recognizer() + recogniser.energy_threshold = energy + recogniser.pause_threshold = pause + recogniser.dynamic_energy_threshold = dynamic_energy + + with sr.Microphone(sample_rate = 10000) as microphone: + rospy.loginfo("Listening...") + while True and not rospy.is_shutdown(): + audio = recogniser.listen(microphone) + + with tempfile.NamedTemporaryFile(mode = "wb", suffix = ".wav", delete = False) as f: + audio_path = f.name + f.write(audio.get_wav_data()) + + rospy.loginfo("I heard something... Written to %s" % audio_path) + req = requests.post( + "http://%s/asr?output=json" % whisper_api_url, + files = {"audio_file": open(audio_path, "rb")} + ) + os.remove(audio_path) + o = req.json() + rospy.loginfo("Transcribed '%s'" % o["text"]) + if o["text"] != "": + self.transcription_pub.publish( + text = o["text"], + language = o["language"], + temperature = o["segments"][0]["temperature"], + avg_logprob = o["segments"][0]["avg_logprob"], + compression_ratio = o["segments"][0]["compression_ratio"], + no_speech_prob = o["segments"][0]["no_speech_prob"] + ) + + + +if __name__ == "__main__": + rospy.init_node("whisper_wrapper") + whisperwrapper = WhisperWrapper() + + + |