SDKs

O CPqD oferece SDKs para simplificar o desenvolvimento de aplicações usando reconhecimento de fala através da API WebSocket. Atualmente são disponibilizados SDKs na forma de bibliotecas em C++, Java e Python. Três exemplos de uso desses SDKs são mostrados a seguir.

Para mais informações, entre em contato com o CPqD.

Exemplo em C++

std::unique_ptr<RecognitionConfig> config = RecognitionConfig::Builder()
  .maxSentences(3)
  .confidenceThreshold(50)
  .noInputTimeoutEnabled(true)
  .noInputTimeoutMilliseconds(5000)
  .startInputTimers(true)
  .build();

std::unique_ptr<SpeechRecognizer> asr = SpeechRecognizer::Builder()
  .serverUrl("wss://speech.cpqd.com.br/asr/ws/v2/recognize/16k")
  .credentials("guest", "1234")
  .recogConfig(std::move(config))
  .build();

std::shared_ptr<AudioSource> audio = std::make_shared<AudioSourceFile>("/opt/audio/yes.wav");

std::unique_ptr<LanguageModelList> lm = LanguageModelList::Builder()
  .addFromURI("builtin:grammar/yes_no")
  .build();

try {
  asr->recognize(audio, std::move(lm));
  RecognitionResult result = (asr->waitRecognitionResult())[0];
  int i = 0;
  for (RecognitionResult::Alternative& alt : result.getAlternatives()) {
    std::cout << "Alternativa ["
              << ++i
              << "] (score = "
              << alt.getConfidence()
              << "): "
              << alt.getText()
              << std::endl;
    int j = 0;
    for (Interpretation& interpretation : alt.getInterpretations()) {
      std::cout << "\t Interpretacao ["
                << ++j
                << "]: "
                << interpretation.text_
                << std::endl;
    }
  }
} catch (RecognitionException e) {
  ...
}
asr->close();

Exemplo em Java

RecognitionConfig config = RecognitionConfig.builder()
  .maxSentences(3)
  .confidenceThreshold(80)
  .noInputTimeoutEnabled(true)
  .noInputTimeoutMilis(50)
  .startInputTimers(true)
  .build();

SpeechRecognizer asr = SpeechRecognizer.builder()
  .serverUrl("wss://speech.cpqd.com.br/asr/ws/v2/recognize/16k")
  .credentials("guest", "1234")
  .recogConfig(config)
  .build();

AudioSource audio = new FileAudioSource("/opt/audio/yes.wav");

LanguageModelList lm = LanguageModelList.builder()
  .addFromURI("builtin:grammar/yes_no")
  .build();

try {
  asr.recognize(audio, lm);
  RecognitionResult result = asr.waitRecognitionResult().get(0);
  int i = 0;
  for (RecognitionAlternative alt : result.getAlternatives()) {
    System.out.println(String.format("Alternativa [%s] (score = %s): %s",
                                      i++,
                                      alt.getConfidence(),
                                      alt.getText()));
    int j = 0;
    for (Interpretation interpretation : alt.getInterpretations()) {
      System.out.println(String.format("\t Interpretacao [%s]: %s",
                                        j++,
                                        interpretation));
    }
  }
} catch (RecognitionException e) {
  ...
} finally {
  asr.close();
}

Exemplo em Python

from cpqdasr import SpeechRecognizer, RecognitionException
from cpqdasr import FileAudioSource, LanguageModelList

config = {
  "maxSentences": 3,
  "confidenceThreshold": 80,
  "noInputTimeoutEnabled": True,
  "noInputTimeoutMilis": 50,
  "startInputTimers": True
  }

asr = SpeechRecognizer(
  server_url="wss://speech.cpqd.com.br/asr/ws/v2/recognize/16k",
  credentials=("guest", "1234"),
  recog_config=config
  )

audio = FileAudioSource("/opt/audio/yes.wav")

lm = LanguageModelList(
       LanguageModelList.from_uri("builtin:grammar/yes_no")
     )

try:
    asr.recognize(audio, lm)
    result = asr.waitRecognitionResult()[0]
    i = 0
    for alt in result.alternatives:
        print("Alternativa {} (score = {}): {}".format(
              i,
              alt['score'],
              alt['text']))
        i += 1
        j = 0
        for interpretation in alt['interpretations']:
            print("\t Interpretacao [{}]: {}".format(
                  j,
                  interpretation))
            j += 1
except RecognitionException as e:
    raise
finally:
    asr.close()