summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Cochrane <adrian@openwork.nz>2021-06-03 20:36:53 +1200
committerAdrian Cochrane <adrian@openwork.nz>2021-06-03 20:37:14 +1200
commit5199f051ab1452575e2cfc59ad16084f632ace87 (patch)
treee1fc1f199780aef2e6b5755b588dafc75c8f96cc
parent4ab0d7f7da55b567f018b2b4e2f2f494a82b467f (diff)
downloadrhapsode-5199f051ab1452575e2cfc59ad16084f632ace87.tar.gz
rhapsode-5199f051ab1452575e2cfc59ad16084f632ace87.tar.bz2
rhapsode-5199f051ab1452575e2cfc59ad16084f632ace87.zip
Integrate Voice2JSON for voice command input.
Ended up coding the commandline in Haskell because that, somewhat surprisingly, had better APIs for the purpose than C. Turned out to be well worth navigating the language barrier.
-rw-r--r--rhapsode.cabal10
-rw-r--r--src/Links.hs33
-rw-r--r--src/main.c24
3 files changed, 54 insertions, 13 deletions
diff --git a/rhapsode.cabal b/rhapsode.cabal
index 5a0cac0..4bce942 100644
--- a/rhapsode.cabal
+++ b/rhapsode.cabal
@@ -61,11 +61,15 @@ library
-- Other library packages from which modules are imported.
build-depends: base >=4.9 && <5, directory >= 1.3.2, bytestring,
+ file-embed >= 0.0.9 && < 0.1, time, parallel >= 1,
+ -- HTML parsing, SSML output
html-conduit, xml-conduit, text, containers, data-default-class,
- network-uri,
+ -- Networking
+ network-uri, async, hurl >= 2, filepath, temporary,
+ -- CSS
stylist >= 2.4 && <3, css-syntax, xml-conduit-stylist >= 2.3 && <3, scientific,
- async, hurl >= 2, filepath, temporary,
- file-embed >= 0.0.9 && < 0.1, time, parallel >= 1, process
+ -- Voice2Json input
+ process, aeson >= 1.5 && <1.6, unordered-containers
-- Directories containing source files.
hs-source-dirs: src
diff --git a/src/Links.hs b/src/Links.hs
index 914df0c..0a6ac98 100644
--- a/src/Links.hs
+++ b/src/Links.hs
@@ -22,16 +22,19 @@ import Control.Exception (catch)
import System.Directory -- For locating links.xml, suggestions.gmni
import System.FilePath
-import System.IO (hPrint, stderr) -- For error reporting
+import System.IO (hPrint, stderr, hGetContents) -- For error reporting, Voice2Json
-- For suggestions.gmni
import qualified Data.Set as Set
import Data.List (nub, intercalate)
import Control.Concurrent (forkIO)
--- For Voice2Json's sentences.ini
+-- For Voice2Json
import Data.Char
-import System.Process (callProcess)
+import System.Process
+import Data.Aeson
+import qualified Data.HashMap.Strict as HM
+import qualified Data.ByteString.Lazy as LBS
data Link = Link {
label :: Text,
@@ -249,6 +252,10 @@ c_extractLinks c_page c_v2jProfile = do
text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString)
+------
+--- C helper functions
+------
+
foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString
c_formatLink c_label c_title c_url = do
@@ -273,10 +280,28 @@ c_formatLink c_label c_title c_url = do
prosody attrs txt = el "prosody" attrs [NodeContent txt]
style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]
+--- For Voice2JSON
+
foreign export ccall c_dataDir :: CString -> IO CString
--- | Used to find Voice2Json profile
c_dataDir c_subdir = do
subdir <- peekCString c_subdir
cache <- getXdgDirectory XdgData "rhapsode"
newCString (cache </> subdir)
+
+foreign export ccall c_recognizeIntent :: CString -> IO CString
+
+c_recognizeIntent c_profile = do
+ profile <- peekCString c_profile
+ (_, Just pipe, _, _) <- createProcess (proc "voice2json" [
+ "--profile", profile,
+ "transcribe-stream",
+ "-c", "1"]){std_out = CreatePipe}
+ (_, Just out, _, _) <- createProcess (proc "voice2json" [
+ "--profile", profile,
+ "recognize-intent"]){std_in = UseHandle pipe, std_out = CreatePipe}
+ intent <- LBS.hGetContents out
+ let transcript = case decode intent of
+ Just (Object obj) | Just (String txt) <- "text" `HM.lookup` obj -> unpack txt
+ _ -> ""
+ newCString transcript
diff --git a/src/main.c b/src/main.c
index ec17e0b..ee129a8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -36,9 +36,11 @@ void c_writeLog(char*, struct session*);
char *c_renderDoc(struct session*, struct page*, _Bool);
char **c_extractLinks(struct page*, char *v2jProfile);
char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME segfaults.
+
int c_ssmlHasMark(char*, char*);
char *c_formatLink(char *label, char *title, char *url);
char *c_dataDir(char *subdir);
+char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs.
char *c_lastVisited(char*);
@@ -219,11 +221,12 @@ char *select_link(char **links, const char *command) {
struct termios stored_settings, no_echo;
int read_keyboard = 1;
-int speak_finalize(char *ssml, char **links, char **out_link) {
+int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) {
while (read_keyboard) {
if (out_link != NULL && *out_link != NULL) return 0;
- if (getc(stdin) == '\033') {
+ char ch = getc(stdin);
+ if (ch == '\033') {
char mark[200];
char fallback[200];
espeak_Cancel();
@@ -285,15 +288,24 @@ int speak_finalize(char *ssml, char **links, char **out_link) {
speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
break;
}
+ } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') {
+ char *line = c_recognizeIntent(v2j_profile);
+ if (out_link != NULL) *out_link = select_link(links, line);
+ free(line);
} else {
// Read in a line
tcsetattr(0, TCSANOW, &stored_settings);
- char *line = NULL;
- size_t len = 0;
+ char buffer[512];
+
+ buffer[0] = ch;
+ putchar(ch);
+
+ char *line = buffer + 1;
+ size_t len = 512;
if (getline(&line, &len, stdin) < 0)
fprintf(stderr, "Failed to read stdin line!\n");
else if (out_link != NULL)
- *out_link = select_link(links, line);
+ *out_link = select_link(links, buffer);
tcsetattr(0, TCSANOW, &no_echo);
}
}
@@ -510,7 +522,7 @@ read_uri:
speak_err = espeak_ng_Synchronize();
if (speak_err == 0) select_link(links, "");
}
- if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, links, &uri);
+ if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri);
if (uri != NULL) goto read_uri;
#ifdef WITH_SPEECHD