Bibliotecas de cliente

O CPqD ASR oferece algumas bibliotecas para simplificar o desenvolvimento de aplicações usando reconhecimento de fala. Essas bibliotecas utilizam internamente a API WebSocket para realizar a integração com o servidor CPqD ASR.

Atualmente são disponibilizados as seguintes bibliotecas:

A seguir, alguns exemplos de uso das bibliotecas.

Exemplo em C++

std::unique_ptr<RecognitionConfig> config = RecognitionConfig::Builder()
 .maxSentences(3)
 .confidenceThreshold(80)
 .noInputTimeoutEnabled(true)
 .noInputTimeoutMilliseconds(5000)
 .startInputTimers(true)
 .build();

std::unique_ptr<SpeechRecognizer> asr = SpeechRecognizer::Builder()
 .serverUrl("wss://speech.cpqd.com.br/asr/ws/v2/recognize/16k")
 .credentials("MY_USER", "MY_PASSWORD")
 .recogConfig(std::move(config))
 .build();

std::shared_ptr<AudioSource> audio = std::make_shared<AudioSourceFile>("/opt/audio/sim.wav");

std::unique_ptr<LanguageModelList> lm = LanguageModelList::Builder()
 .addFromURI("builtin:grammar/boolean")
 .build();

try {
 asr->recognize(audio, std::move(lm));
 RecognitionResult result = (asr->waitRecognitionResult())[0];
 int i = 0;
 for (RecognitionResult::Alternative& alt : result.getAlternatives()) {
   std::cout << "Alternativa ["
             << ++i
             << "] (score = "
             << alt.getConfidence()
             << "): "
             << alt.getText()
             << std::endl;
   int j = 0;
   for (Interpretation& interpretation : alt.getInterpretations()) {
     std::cout << "\t Interpretacao ["
               << ++j
               << "]: "
               << interpretation.text_
               << std::endl;
   }
 }
} catch (RecognitionException e) {
 ...
}
asr->close();

Exemplo em Java

RecognitionConfig config = RecognitionConfig.builder()
 .maxSentences(3)
 .confidenceThreshold(80)
 .noInputTimeoutEnabled(true)
 .noInputTimeoutMilis(5000)
 .startInputTimers(true)
 .build();

SpeechRecognizer asr = SpeechRecognizer.builder()
 .serverUrl("wss://speech.cpqd.com.br/asr/ws/v2/recognize/16k")
 .credentials("MY_USER", "MY_PASSWORD")
 .recogConfig(config)
 .build();

AudioSource audio = new FileAudioSource("/opt/audio/sim.wav");

LanguageModelList lm = LanguageModelList.builder()
 .addFromURI("builtin:grammar/boolean")
 .build();

try {
 asr.recognize(audio, lm);
 RecognitionResult result = asr.waitRecognitionResult().get(0);
 int i = 0;
 for (RecognitionAlternative alt : result.getAlternatives()) {
   System.out.println(String.format("Alternativa [%s] (score = %s): %s",
                                     i++,
                                     alt.getConfidence(),
                                     alt.getText()));
   int j = 0;
   for (Interpretation interpretation : alt.getInterpretations()) {
     System.out.println(String.format("\t Interpretacao [%s]: %s",
                                       j++,
                                       interpretation));
   }
 }
} catch (RecognitionException e) {
 ...
} finally {
 asr.close();
}

Exemplo em Python

from cpqdasr import SpeechRecognizer, RecognitionException
from cpqdasr import FileAudioSource, LanguageModelList

config = {
  "maxSentences": 3,
  "confidenceThreshold": 80,
  "noInputTimeoutEnabled": True,
  "noInputTimeoutMilis": 50,
  "startInputTimers": True
  }

asr = SpeechRecognizer(
  server_url="wss://speech.cpqd.com.br/asr/ws/v2/recognize/16k",
  credentials=("guest", "1234"),
  recog_config=config
  )

audio = FileAudioSource("/opt/audio/yes.wav")

lm = LanguageModelList(
       LanguageModelList.from_uri("builtin:grammar/yes_no")
     )

try:
    asr.recognize(audio, lm)
    result = asr.waitRecognitionResult()[0]
    i = 0
    for alt in result.alternatives:
        print("Alternativa {} (score = {}): {}".format(
              i,
              alt['score'],
              alt['text']))
        i += 1
        j = 0
        for interpretation in alt['interpretations']:
            print("\t Interpretacao [{}]: {}".format(
                  j,
                  interpretation))
            j += 1
except RecognitionException as e:
    raise
finally:
    asr.close()