package main import ( "fmt" "io" "os" "os/exec" "os/signal" "syscall" njson "github.com/m7shapan/njson" vosk "github.com/alphacep/vosk-api/go" ) func usage() { fmt.Println(`Usage: voskcli [FILE...] --mic=NAME Specify the audio device (use 'arecord -L' to find mic name). --model=PATH Path to the model to use`) } func fatal(a ...any) { fmt.Fprintln(os.Stderr, "voskcli:", fmt.Sprint(a...)) os.Exit(1) } func warn(a ...any) { fmt.Fprintln(os.Stderr, "voskcli: WARNING:", fmt.Sprint(a...)) } func print(a ...any) { fmt.Fprintln(os.Stdout, fmt.Sprint(a...)) } func parseResults(json string) string { type ResultJson struct { Text string `njson:"text"` Words []string `njson:"result.#.word"` Confs []float64 `njson:"result.#.conf"` Starts []float64 `njson:"result.#.start"` Ends []float64 `njson:"result.#.end"` Confidence float64 `njson:"confidence"` // only with alternatives } var s struct { Alternatives []ResultJson `njson:"alternatives"` // copy paste of ResultJson Text string `njson:"text"` Words []string `njson:"result.#.word"` Confs []float64 `njson:"result.#.conf"` Starts []float64 `njson:"result.#.start"` Ends []float64 `njson:"result.#.end"` Confidence float64 `njson:"confidence"` // only with alternatives ParText string `njson:"partial"` ParWords []string `njson:"partial_result.#.word"` ParConfs []float64 `njson:"partial_result.#.conf"` ParStarts []float64 `njson:"partial_result.#.start"` ParEnds []float64 `njson:"partial_result.#.end"` } err := njson.Unmarshal([]byte(json), &s) if err != nil { panic(err) } print(json); return s.Text } func getMic(mic string) string { if mic == "" { mic = "default" } return mic } func record(mic string) (io.Reader, error) { cmd := exec.Command("arecord", "-q", "-fS16_LE", "-c1", "-r16000", "-D", mic) cmd.Stderr = os.Stderr stdout, err := cmd.StdoutPipe() if err != nil { return nil, err } err = cmd.Start() if err != nil { return nil, err } return stdout, nil } type Action struct { Tags []string Text string } func main() { var opts struct { Mic string Model string Verbose bool } var model *vosk.VoskModel { m := os.Getenv("VOSK_MODEL") if m == "" { fatal("you need to install the a vosk model and set $VOSK_MODEL") } if opts.Verbose { fmt.Fprintln(os.Stderr, "Model: " + m) } var err error model, err = vosk.NewModel(m) if err != nil { fatal(err) } } defer model.Free() var r *vosk.VoskRecognizer { var err error r, err = vosk.NewRecognizer(model, float64(16000)) if err != nil { panic(err) } r.SetWords(1) r.SetMaxAlternatives(3)//3 ? 10 ? } defer r.Free() var mic string var audio io.Reader mic = getMic(opts.Mic) if opts.Verbose { fmt.Fprintln(os.Stderr, "Microphone: " + mic) } var err error audio, err = record(mic) if err != nil { fatal(err) } terminate := make(chan os.Signal, 1) signal.Notify(terminate, os.Interrupt, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM) for { select { case <-terminate: return default: } chunk := make([]byte, 4096) _, err := io.ReadFull(audio, chunk) if err != nil { if err != io.EOF && err != io.ErrUnexpectedEOF { panic(err) } r, err := record(mic) if err != nil { warn(err) } audio = r continue } var res string if (r.AcceptWaveform(chunk) == 1) { res = r.FinalResult() parseResults(res); } else { res = r.PartialResult() // r.Result() parseResults(res); } } }