PocketSphinx 5prealpha
acmod.h
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
43#ifndef __ACMOD_H__
44#define __ACMOD_H__
45
46/* System headers. */
47#include <stdio.h>
48
49/* SphinxBase headers. */
50#include <sphinxbase/cmd_ln.h>
51#include <sphinxbase/logmath.h>
52#include <sphinxbase/fe.h>
53#include <sphinxbase/feat.h>
54#include <sphinxbase/bitvec.h>
55#include <sphinxbase/err.h>
56#include <sphinxbase/prim_type.h>
57
58/* Local headers. */
59#include "ps_mllr.h"
60#include "bin_mdef.h"
61#include "tmat.h"
62#include "hmm.h"
63
67typedef enum acmod_state_e {
73
77#define SENSCR_DUMMY 0x7fff
78
82struct ps_mllr_s {
83 int refcnt;
84 int n_class;
85 int n_feat;
86 int *veclen;
87 float32 ****A;
88 float32 ***b;
89 float32 ***h;
90 int32 *cb2mllr;
91};
92
96typedef struct ps_mgau_s ps_mgau_t;
97
98typedef struct ps_mgaufuncs_s {
99 char const *name;
100
101 int (*frame_eval)(ps_mgau_t *mgau,
102 int16 *senscr,
103 uint8 *senone_active,
104 int32 n_senone_active,
105 mfcc_t ** feat,
106 int32 frame,
107 int32 compallsen);
108 int (*transform)(ps_mgau_t *mgau,
109 ps_mllr_t *mllr);
110 void (*free)(ps_mgau_t *mgau);
112
113struct ps_mgau_s {
116};
117
118#define ps_mgau_base(mg) ((ps_mgau_t *)(mg))
119#define ps_mgau_frame_eval(mg,senscr,senone_active,n_senone_active,feat,frame,compallsen) \
120 (*ps_mgau_base(mg)->vt->frame_eval) \
121 (mg, senscr, senone_active, n_senone_active, feat, frame, compallsen)
122#define ps_mgau_transform(mg, mllr) \
123 (*ps_mgau_base(mg)->vt->transform)(mg, mllr)
124#define ps_mgau_free(mg) \
125 (*ps_mgau_base(mg)->vt->free)(mg)
126
148struct acmod_s {
149 /* Global objects, not retained. */
150 cmd_ln_t *config;
151 logmath_t *lmath;
152 glist_t strings;
154 /* Feature computation: */
155 fe_t *fe;
156 feat_t *fcb;
158 /* Model parameters: */
164 /* Senone scoring: */
172 /* Utterance processing: */
173 mfcc_t **mfc_buf;
174 mfcc_t ***feat_buf;
175 FILE *rawfh;
176 FILE *mfcfh;
177 FILE *senfh;
178 FILE *insenfh;
179 long *framepos;
181 /* Rawdata collected during decoding */
182 int16 *rawdata;
183 int32 rawdata_size;
184 int32 rawdata_pos;
185
186 /* A whole bunch of flags and counters: */
187 uint8 state;
189 uint8 grow_feat;
201};
202typedef struct acmod_s acmod_t;
203
220acmod_t *acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb);
221
234
242int acmod_set_senfh(acmod_t *acmod, FILE *senfh);
243
251int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh);
252
260int acmod_set_rawfh(acmod_t *acmod, FILE *logfh);
261
265void acmod_free(acmod_t *acmod);
266
270int acmod_start_utt(acmod_t *acmod);
271
275int acmod_end_utt(acmod_t *acmod);
276
289int acmod_rewind(acmod_t *acmod);
290
300int acmod_advance(acmod_t *acmod);
301
310int acmod_set_grow(acmod_t *acmod, int grow_feat);
311
330int acmod_process_raw(acmod_t *acmod,
331 int16 const **inout_raw,
332 size_t *inout_n_samps,
333 int full_utt);
334
346int acmod_process_cep(acmod_t *acmod,
347 mfcc_t ***inout_cep,
348 int *inout_n_frames,
349 int full_utt);
350
364int acmod_process_feat(acmod_t *acmod,
365 mfcc_t **feat);
366
373int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh);
374
380int acmod_read_scores(acmod_t *acmod);
381
391mfcc_t **acmod_get_frame(acmod_t *acmod, int *inout_frame_idx);
392
406int16 const *acmod_score(acmod_t *acmod,
407 int *inout_frame_idx);
408
412int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh);
413
417int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
418 int16 const *senscr, FILE *senfh);
419
420
424int acmod_best_score(acmod_t *acmod, int *out_best_senid);
425
429void acmod_clear_active(acmod_t *acmod);
430
434void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm);
435
439#define acmod_activate_sen(acmod, sen) bitvec_set((acmod)->senone_active_vec, sen)
440
444int32 acmod_flags2list(acmod_t *acmod);
445
449int32 acmod_stream_offset(acmod_t *acmod);
450
454void acmod_start_stream(acmod_t *acmod);
455
459void acmod_set_rawdata_size(acmod_t *acmod, int32 size);
460
464void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size);
465
466#endif /* __ACMOD_H__ */
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Definition acmod.c:1332
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
Definition acmod.c:410
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
Definition acmod.c:299
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Definition acmod.c:607
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
Definition acmod.c:441
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition acmod.c:1213
int acmod_best_score(acmod_t *acmod, int *out_best_senid)
Get best score and senone index for current frame.
Definition acmod.c:1168
int acmod_advance(acmod_t *acmod)
Advance the frame index.
Definition acmod.c:899
mfcc_t ** acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
Get a frame of dynamic feature data.
Definition acmod.c:1088
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
Definition acmod.c:375
enum acmod_state_e acmod_state_t
States in utterance processing.
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
Definition acmod.c:1012
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition acmod.c:1106
int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, int16 const *senscr, FILE *senfh)
Write a frame of senone scores to a dump file.
Definition acmod.c:911
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
Definition acmod.c:387
acmod_state_e
States in utterance processing.
Definition acmod.h:67
@ ACMOD_IDLE
Not in an utterance.
Definition acmod.h:68
@ ACMOD_PROCESSING
Utterance in progress.
Definition acmod.h:70
@ ACMOD_ENDED
Utterance ended, still buffering.
Definition acmod.h:71
@ ACMOD_STARTED
Utterance started, no data yet.
Definition acmod.h:69
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
Definition acmod.c:877
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing.
Definition acmod.c:1308
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
Definition acmod.c:1321
int acmod_set_senfh(acmod_t *acmod, FILE *senfh)
Start logging senone scores to a filehandle.
Definition acmod.c:364
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
Definition acmod.c:699
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
Definition acmod.c:423
int acmod_process_feat(acmod_t *acmod, mfcc_t **feat)
Feed dynamic feature data into the acoustic model for scoring.
Definition acmod.c:797
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
Write senone dump file header.
Definition acmod.c:350
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
Definition acmod.c:339
int32 acmod_flags2list(acmod_t *acmod)
Build active list from.
Definition acmod.c:1254
int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh)
Set up a senone score dump file for input.
Definition acmod.c:864
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
Definition acmod.c:1314
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
Definition acmod.c:228
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition acmod.c:1197
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
Implementation of HMM base structure.
int32 frame_idx_t
Type for frame index values.
Definition hmm.h:64
Model-space linear transforms for speaker adaptation.
Acoustic model structure.
Definition acmod.h:148
ps_mgau_t * mgau
Model parameters.
Definition acmod.h:161
uint8 state
State of utterance processing.
Definition acmod.h:187
frame_idx_t n_mfc_frame
Number of frames active in mfc_buf.
Definition acmod.h:196
bin_mdef_t * mdef
Model definition.
Definition acmod.h:159
cmd_ln_t * config
Configuration.
Definition acmod.h:150
feat_t * fcb
Dynamic feature computation.
Definition acmod.h:156
bitvec_t * senone_active_vec
Active GMMs in current frame.
Definition acmod.h:166
ps_mllr_t * mllr
Speaker transformation.
Definition acmod.h:162
int n_senone_active
Number of active GMMs.
Definition acmod.h:169
fe_t * fe
Acoustic feature computation.
Definition acmod.h:155
uint8 * senone_active
Array of deltas to active GMMs.
Definition acmod.h:167
mfcc_t ** mfc_buf
Temporary buffer of acoustic features.
Definition acmod.h:173
FILE * mfcfh
File for writing acoustic feature data.
Definition acmod.h:176
frame_idx_t n_feat_alloc
Number of frames allocated in feat_buf.
Definition acmod.h:198
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
Definition acmod.h:199
int log_zero
Zero log-probability value.
Definition acmod.h:170
logmath_t * lmath
Log-math computation.
Definition acmod.h:151
glist_t strings
Temporary acoustic model filenames.
Definition acmod.h:152
int16 * senone_scores
GMM scores for current frame.
Definition acmod.h:165
tmat_t * tmat
Transition matrices.
Definition acmod.h:160
int senscr_frame
Frame index for senone_scores.
Definition acmod.h:168
FILE * senfh
File for writing senone score data.
Definition acmod.h:177
long * framepos
File positions of recent frames in senone file.
Definition acmod.h:179
uint8 compallsen
Compute all senones?
Definition acmod.h:188
uint8 grow_feat
Whether to grow feat_buf.
Definition acmod.h:189
FILE * insenfh
Input senone score file.
Definition acmod.h:178
frame_idx_t feat_outidx
Start of active frames in feat_buf.
Definition acmod.h:200
uint8 insen_swap
Whether to swap input senone score.
Definition acmod.h:190
FILE * rawfh
File for writing raw audio data.
Definition acmod.h:175
frame_idx_t output_frame
Index of next frame of dynamic features.
Definition acmod.h:194
frame_idx_t utt_start_frame
Index of the utterance start in the stream, all timings are relative to that.
Definition acmod.h:192
frame_idx_t n_mfc_alloc
Number of frames allocated in mfc_buf.
Definition acmod.h:195
mfcc_t *** feat_buf
Temporary buffer of dynamic features.
Definition acmod.h:174
frame_idx_t mfc_outidx
Start of active frames in mfc_buf.
Definition acmod.h:197
An individual HMM among the HMM search space.
int frame_idx
frame counter.
Definition acmod.h:115
ps_mgaufuncs_t * vt
vtable of mgau functions.
Definition acmod.h:114
Feature space linear transform structure.
Definition acmod.h:82
int * veclen
Length of input vectors for each stream.
Definition acmod.h:86
int n_class
Number of MLLR classes.
Definition acmod.h:84
float32 **** A
Rotation part of mean transformations.
Definition acmod.h:87
float32 *** b
Bias part of mean transformations.
Definition acmod.h:88
int32 * cb2mllr
Mapping from codebooks to transformations.
Definition acmod.h:90
int refcnt
Reference count.
Definition acmod.h:83
int n_feat
Number of feature streams.
Definition acmod.h:85
float32 *** h
Diagonal transformation of variances.
Definition acmod.h:89
Transition matrix data structure.
Definition tmat.h:55
Transition matrix data structure.