178 lines
3.4 KiB
Go
178 lines
3.4 KiB
Go
|
package main
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"os"
|
||
|
"os/exec"
|
||
|
"os/signal"
|
||
|
"syscall"
|
||
|
njson "github.com/m7shapan/njson"
|
||
|
vosk "github.com/alphacep/vosk-api/go"
|
||
|
)
|
||
|
|
||
|
func usage() {
|
||
|
fmt.Println(`Usage: voskcli [FILE...]
|
||
|
|
||
|
--mic=NAME Specify the audio device (use 'arecord -L' to find mic name).
|
||
|
--model=PATH Path to the model to use`)
|
||
|
}
|
||
|
|
||
|
func fatal(a ...any) {
|
||
|
fmt.Fprintln(os.Stderr, "voskcli:", fmt.Sprint(a...))
|
||
|
os.Exit(1)
|
||
|
}
|
||
|
func warn(a ...any) {
|
||
|
fmt.Fprintln(os.Stderr, "voskcli: WARNING:", fmt.Sprint(a...))
|
||
|
}
|
||
|
|
||
|
func print(a ...any) {
|
||
|
fmt.Fprintln(os.Stdout, fmt.Sprint(a...))
|
||
|
}
|
||
|
|
||
|
|
||
|
func parseResults(json string) string {
|
||
|
type ResultJson struct {
|
||
|
Text string `njson:"text"`
|
||
|
Words []string `njson:"result.#.word"`
|
||
|
Confs []float64 `njson:"result.#.conf"`
|
||
|
Starts []float64 `njson:"result.#.start"`
|
||
|
Ends []float64 `njson:"result.#.end"`
|
||
|
Confidence float64 `njson:"confidence"` // only with alternatives
|
||
|
}
|
||
|
var s struct {
|
||
|
Alternatives []ResultJson `njson:"alternatives"`
|
||
|
|
||
|
// copy paste of ResultJson
|
||
|
Text string `njson:"text"`
|
||
|
Words []string `njson:"result.#.word"`
|
||
|
Confs []float64 `njson:"result.#.conf"`
|
||
|
Starts []float64 `njson:"result.#.start"`
|
||
|
Ends []float64 `njson:"result.#.end"`
|
||
|
Confidence float64 `njson:"confidence"` // only with alternatives
|
||
|
|
||
|
ParText string `njson:"partial"`
|
||
|
ParWords []string `njson:"partial_result.#.word"`
|
||
|
ParConfs []float64 `njson:"partial_result.#.conf"`
|
||
|
ParStarts []float64 `njson:"partial_result.#.start"`
|
||
|
ParEnds []float64 `njson:"partial_result.#.end"`
|
||
|
}
|
||
|
err := njson.Unmarshal([]byte(json), &s)
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
print(json);
|
||
|
return s.Text
|
||
|
}
|
||
|
|
||
|
|
||
|
func getMic(mic string) string {
|
||
|
if mic == "" {
|
||
|
mic = "default"
|
||
|
}
|
||
|
return mic
|
||
|
}
|
||
|
|
||
|
func record(mic string) (io.Reader, error) {
|
||
|
cmd := exec.Command("arecord", "-q", "-fS16_LE", "-c1", "-r16000", "-D", mic)
|
||
|
cmd.Stderr = os.Stderr
|
||
|
stdout, err := cmd.StdoutPipe()
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
err = cmd.Start()
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return stdout, nil
|
||
|
}
|
||
|
|
||
|
type Action struct {
|
||
|
Tags []string
|
||
|
Text string
|
||
|
}
|
||
|
|
||
|
func main() {
|
||
|
var opts struct {
|
||
|
Mic string
|
||
|
Model string
|
||
|
Verbose bool
|
||
|
}
|
||
|
var model *vosk.VoskModel
|
||
|
{
|
||
|
m := os.Getenv("VOSK_MODEL")
|
||
|
if m == "" {
|
||
|
fatal("you need to install the a vosk model and set $VOSK_MODEL")
|
||
|
}
|
||
|
if opts.Verbose {
|
||
|
fmt.Fprintln(os.Stderr, "Model: " + m)
|
||
|
}
|
||
|
|
||
|
var err error
|
||
|
model, err = vosk.NewModel(m)
|
||
|
if err != nil {
|
||
|
fatal(err)
|
||
|
}
|
||
|
}
|
||
|
defer model.Free()
|
||
|
|
||
|
var r *vosk.VoskRecognizer
|
||
|
{
|
||
|
var err error
|
||
|
r, err = vosk.NewRecognizer(model, float64(16000))
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
r.SetWords(1)
|
||
|
r.SetMaxAlternatives(3)//3 ? 10 ?
|
||
|
}
|
||
|
defer r.Free()
|
||
|
|
||
|
var mic string
|
||
|
var audio io.Reader
|
||
|
|
||
|
mic = getMic(opts.Mic)
|
||
|
if opts.Verbose {
|
||
|
fmt.Fprintln(os.Stderr, "Microphone: " + mic)
|
||
|
}
|
||
|
var err error
|
||
|
audio, err = record(mic)
|
||
|
if err != nil {
|
||
|
fatal(err)
|
||
|
}
|
||
|
|
||
|
terminate := make(chan os.Signal, 1)
|
||
|
signal.Notify(terminate, os.Interrupt, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
|
||
|
|
||
|
for {
|
||
|
select {
|
||
|
case <-terminate: return
|
||
|
default:
|
||
|
}
|
||
|
chunk := make([]byte, 4096)
|
||
|
_, err := io.ReadFull(audio, chunk)
|
||
|
if err != nil {
|
||
|
if err != io.EOF && err != io.ErrUnexpectedEOF {
|
||
|
panic(err)
|
||
|
}
|
||
|
r, err := record(mic)
|
||
|
if err != nil {
|
||
|
warn(err)
|
||
|
}
|
||
|
audio = r
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
var res string
|
||
|
if (r.AcceptWaveform(chunk) == 1) {
|
||
|
res = r.FinalResult()
|
||
|
parseResults(res);
|
||
|
} else {
|
||
|
res = r.PartialResult() // r.Result()
|
||
|
parseResults(res);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|