summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Cochrane <adrian@openwork.nz>2021-06-07 15:43:04 +1200
committerAdrian Cochrane <adrian@openwork.nz>2021-06-07 15:43:04 +1200
commit6cee31ba85e77a7edfcf7e3d8f32bb4b004c481d (patch)
treed98e9438bb318a21d6ceb65422a1d24049ed59aa
parent17a4d92a5c800d7ff1d83201ca28ed1fffbe7d92 (diff)
downloadrhapsode-6cee31ba85e77a7edfcf7e3d8f32bb4b004c481d.tar.gz
rhapsode-6cee31ba85e77a7edfcf7e3d8f32bb4b004c481d.tar.bz2
rhapsode-6cee31ba85e77a7edfcf7e3d8f32bb4b004c481d.zip
Add wakeword detection. Currently uses Voice2JSON/Mycroft Precise's default 'Hey mycroft'
-rw-r--r--src/Links.hs12
-rw-r--r--src/main.c96
2 files changed, 97 insertions, 11 deletions
diff --git a/src/Links.hs b/src/Links.hs
index b5803e3..2a4b0fd 100644
--- a/src/Links.hs
+++ b/src/Links.hs
@@ -286,6 +286,18 @@ c_formatLink c_label c_title c_url = do
prosody attrs txt = el "prosody" attrs [NodeContent txt]
style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]
+foreign export ccall c_dading :: IO CString
+
+c_dading = do
+ sfx <- getXdgDirectory XdgCache "rhapsode"
+ let link_el = audio (sfx </> "link.wav")
+ let root = el "speak" [] [NodeElement link_el]
+ let ssml = renderText def $ Document (Prologue [] Nothing []) root []
+ newCString $ LTxt.unpack ssml
+ where
+ el name attrs childs = Element name (M.fromList attrs) childs
+ audio src = el "audio" [("src", pack src)] []
+
--- For Voice2JSON
foreign export ccall c_dataDir :: CString -> IO CString
diff --git a/src/main.c b/src/main.c
index 383687e..fca7328 100644
--- a/src/main.c
+++ b/src/main.c
@@ -6,6 +6,7 @@
#include <termios.h>
#include <limits.h>
#include <ctype.h>
+#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
@@ -40,6 +41,7 @@ char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME se
int c_ssmlHasMark(char*, char*);
char *c_formatLink(char *label, char *title, char *url);
+char *c_dading();
char *c_dataDir(char *subdir);
char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs.
@@ -220,9 +222,33 @@ char *select_link(char **links, const char *command) {
return NULL;
}
+pid_t v2j_waitwake(char *v2j_profile) {
+ pid_t pid = fork();
+ if (pid == 0) {/* child */
+ int pipefds[2];
+ // Silence standard pipes, not a biggy if it fails.
+ if (pipe(pipefds) == 0) dup2(pipefds[0], 0);
+ if (pipe(pipefds) == 0) dup2(pipefds[1], 1);
+ if (pipe(pipefds) == 0) dup2(pipefds[1], 2);
+ execlp("voice2json", "voice2json", "--profile", v2j_profile, "wait-wake", "--exit-count", "1", NULL);
+ }
+ return pid;
+}
+
+volatile pid_t pid_waitwake = 0;
+static void check_sigchld(int sig) {
+ pid_t pid = 0;
+ do {
+ pid = waitpid(-1, NULL, WNOHANG);
+ if (pid == pid_waitwake) pid_waitwake = 0;
+ } while (pid > 0);
+}
+
struct termios stored_settings, no_echo;
int read_keyboard = 1;
+int use_wakeword = 1;
int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) {
+ // Keyboard input mainloop.
while (read_keyboard) {
if (out_link != NULL && *out_link != NULL) return 0;
@@ -289,9 +315,15 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link)
speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
break;
}
- } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') {
+ } else if ((ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') ||
+ (use_wakeword && pid_waitwake == 0)) {
+ espeak_Cancel();
+ speak(c_dading(), NULL, NULL);
char *line = c_recognizeIntent(v2j_profile);
- if (out_link != NULL) *out_link = select_link(links, line);
+ if (out_link != NULL) {
+ *out_link = select_link(links, line);
+ if (*out_link == NULL) pid_waitwake = v2j_waitwake(v2j_profile);
+ }
free(line);
} else {
// Read in a line
@@ -310,6 +342,20 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link)
tcsetattr(0, TCSANOW, &no_echo);
}
}
+
+ // Wakeword mainloop
+ while (use_wakeword && out_link != NULL) {
+ int status;
+ pid_t child = v2j_waitwake(v2j_profile);
+ if (child < 0) continue; // error
+ if (waitpid(child, &status, 0) == 0) continue;
+ espeak_Cancel();
+ char *line = c_recognizeIntent(v2j_profile);
+ *out_link = select_link(links, line);
+ if (*out_link != NULL) return 0;
+ }
+
+ // Otherwise, wait until eSpeak has had it's say.
espeak_ng_STATUS result = espeak_ng_Synchronize();
if (result != ENS_OK) {
espeak_ng_PrintStatusCodeMessage(result, stderr, context);
@@ -369,9 +415,9 @@ int main(int argc, char **argv) {
int c;
opterr = 0;
#ifdef WITH_SPEECHD
- while ((c = getopt(argc, argv, "xs::l::L:kKvVw::dh")) != -1) {
+ while ((c = getopt(argc, argv, "xs::l::L:kKv::VWw::dh")) != -1) {
#else
- while ((c = getopt(argc, argv, "xs::l::kKv::Vw::h")) != -1) {
+ while ((c = getopt(argc, argv, "xs::l::kKv::VWw::h")) != -1) {
#endif
switch (c) {
case 'x':
@@ -400,6 +446,9 @@ int main(int argc, char **argv) {
if (optarg != NULL) v2j_profile = optarg;
validate_v2j_profile = 1;
break;
+ case 'W':
+ use_wakeword = 0;
+ break;
case 'w':
use_espeak = 1;
path_wav = optarg;
@@ -424,6 +473,7 @@ int main(int argc, char **argv) {
fprintf(stderr, "\t-K\t\tDon't read input from stdin.");
fprintf(stderr, "\t-v\tvoice in\tEnsure voice input is enabled & optionally sets the Voice2JSON profile.\n");
fprintf(stderr, "\t-V\t\tDon't listen for voice input.\n");
+ fprintf(stderr, "\t-W\t\tNo wakeword\tDon't listen for the configured/trained wakeword regardless if voice recognition is enabled.\n");
fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n");
#ifdef WITH_SPEECHD
fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n");
@@ -436,13 +486,13 @@ int main(int argc, char **argv) {
}
}
if (read_keyboard) {
- // Read input character by character, not line by line.
- no_echo = stored_settings;
- no_echo.c_lflag &= (~ICANON);
- no_echo.c_lflag &= (~ECHO);
- no_echo.c_cc[VTIME] = 0;
- no_echo.c_cc[VMIN] = 1;
- tcsetattr(0, TCSANOW, &no_echo);
+ // Read input character by character, not line by line.
+ no_echo = stored_settings;
+ no_echo.c_lflag &= (~ICANON);
+ no_echo.c_lflag &= (~ECHO);
+ no_echo.c_cc[VTIME] = 0;
+ no_echo.c_cc[VMIN] = 1;
+ tcsetattr(0, TCSANOW, &no_echo);
}
if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr;
#ifdef WITH_SPEECHD
@@ -521,6 +571,25 @@ read_uri:
speak_err = espeak_ng_Synchronize();
if (speak_err == 0) select_link(links, "");
}
+
+ if (read_keyboard && use_wakeword && v2j_profile != NULL && *v2j_profile != 0) {
+ // Interrupt read when `voice2json wait-wake` exits.
+ pid_waitwake = v2j_waitwake(v2j_profile);
+ if (pid_waitwake < 0) {
+ fprintf(stderr, "Failed to run wakeword detection.\n");
+ use_wakeword = 0;
+ }
+ else {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = check_sigchld;
+ if (sigaction(SIGCHLD, &act, 0)) {
+ fprintf(stderr, "Failed to wait upon wakeword detection.\n");
+ use_wakeword = 0;
+ }
+ }
+ }
+
if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri);
if (uri != NULL) goto read_uri;
@@ -528,6 +597,11 @@ read_uri:
if (spd_conn != NULL) spd_close(spd_conn);
#endif
+ if (pid_waitwake > 0) {
+ kill(pid_waitwake, SIGTERM);
+ kill(pid_waitwake+1, SIGTERM); // Yuck, likely fragile! Also kill voice2json wait-wake's subprocesses.
+ }
+
c_freePage(referer);
c_freeSession(session);
hs_exit();