voskcli/voskcli.go

178 lines
3.4 KiB
Go
Raw Normal View History

2023-06-27 09:05:25 +02:00
package main
import (
"fmt"
"io"
"os"
"os/exec"
"os/signal"
"syscall"
njson "github.com/m7shapan/njson"
vosk "github.com/alphacep/vosk-api/go"
)
func usage() {
fmt.Println(`Usage: voskcli [FILE...]
--mic=NAME Specify the audio device (use 'arecord -L' to find mic name).
--model=PATH Path to the model to use`)
}
func fatal(a ...any) {
fmt.Fprintln(os.Stderr, "voskcli:", fmt.Sprint(a...))
os.Exit(1)
}
func warn(a ...any) {
fmt.Fprintln(os.Stderr, "voskcli: WARNING:", fmt.Sprint(a...))
}
func print(a ...any) {
fmt.Fprintln(os.Stdout, fmt.Sprint(a...))
}
func parseResults(json string) string {
type ResultJson struct {
Text string `njson:"text"`
Words []string `njson:"result.#.word"`
Confs []float64 `njson:"result.#.conf"`
Starts []float64 `njson:"result.#.start"`
Ends []float64 `njson:"result.#.end"`
Confidence float64 `njson:"confidence"` // only with alternatives
}
var s struct {
Alternatives []ResultJson `njson:"alternatives"`
// copy paste of ResultJson
Text string `njson:"text"`
Words []string `njson:"result.#.word"`
Confs []float64 `njson:"result.#.conf"`
Starts []float64 `njson:"result.#.start"`
Ends []float64 `njson:"result.#.end"`
Confidence float64 `njson:"confidence"` // only with alternatives
ParText string `njson:"partial"`
ParWords []string `njson:"partial_result.#.word"`
ParConfs []float64 `njson:"partial_result.#.conf"`
ParStarts []float64 `njson:"partial_result.#.start"`
ParEnds []float64 `njson:"partial_result.#.end"`
}
err := njson.Unmarshal([]byte(json), &s)
if err != nil {
panic(err)
}
print(json);
return s.Text
}
func getMic(mic string) string {
if mic == "" {
mic = "default"
}
return mic
}
func record(mic string) (io.Reader, error) {
cmd := exec.Command("arecord", "-q", "-fS16_LE", "-c1", "-r16000", "-D", mic)
cmd.Stderr = os.Stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
err = cmd.Start()
if err != nil {
return nil, err
}
return stdout, nil
}
type Action struct {
Tags []string
Text string
}
func main() {
var opts struct {
Mic string
Model string
Verbose bool
}
var model *vosk.VoskModel
{
m := os.Getenv("VOSK_MODEL")
if m == "" {
fatal("you need to install the a vosk model and set $VOSK_MODEL")
}
if opts.Verbose {
fmt.Fprintln(os.Stderr, "Model: " + m)
}
var err error
model, err = vosk.NewModel(m)
if err != nil {
fatal(err)
}
}
defer model.Free()
var r *vosk.VoskRecognizer
{
var err error
r, err = vosk.NewRecognizer(model, float64(16000))
if err != nil {
panic(err)
}
r.SetWords(1)
r.SetMaxAlternatives(3)//3 ? 10 ?
}
defer r.Free()
var mic string
var audio io.Reader
mic = getMic(opts.Mic)
if opts.Verbose {
fmt.Fprintln(os.Stderr, "Microphone: " + mic)
}
var err error
audio, err = record(mic)
if err != nil {
fatal(err)
}
terminate := make(chan os.Signal, 1)
signal.Notify(terminate, os.Interrupt, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
for {
select {
case <-terminate: return
default:
}
chunk := make([]byte, 4096)
_, err := io.ReadFull(audio, chunk)
if err != nil {
if err != io.EOF && err != io.ErrUnexpectedEOF {
panic(err)
}
r, err := record(mic)
if err != nil {
warn(err)
}
audio = r
continue
}
var res string
if (r.AcceptWaveform(chunk) == 1) {
res = r.FinalResult()
parseResults(res);
} else {
res = r.PartialResult() // r.Result()
parseResults(res);
}
}
}