语音识别之HTK入门(四)——HCompV源码解析
语音识别之HTK入门(四)——HCompV源码解析
上一篇博客中用HCompV工具通过全局数据来初始化HMM模型参数,包括proto和vFloor两个文件。同时手工制作hmmdefs模型,在这个文件中列举了每一个单因素模型的参数,它们是完全相同的。
现在一步一步看看HCompV是如何达成目标的。
如前面博客指出的,可以通过visual studio跟踪调试HCompV。
首先列出大体的流程涉及的函数:
分别是:
1)InitShell // 对命令行参数进行处理,这个函数在多个HTKTools之间都用到,它主要把参数保存到savedCommandLine这里,后续对它进行读写处理;比如系统的参数设置,一般是“-C|S|V|D”,并把余下的参数保存到arglist二维字符数组中,计数变量为argcount;
2)然后是一堆初始化工作
InitMem();
InitLabel();
InitMath();
InitSigP();
InitWave();
InitAudio();
InitVQ();
InitModel();
if(InitParm()<SUCCESS) HError(2000,"HCompV: InitParm failed");
if (!InfoPrinted() && NumArgs() == 0)ReportUsage();
if (NumArgs() == 0) Exit(0);
3)核心的处理函数是从CreateHMMSet开始的。
下面来详细分析下这个函数做了哪些任务,首先在代码中函数声明为
/* EXPORT->CreateHMMSet: create the basic HMMSet structure */
void CreateHMMSet(HMMSet *hset, MemHeap *heap, Boolean allowTMods);它接受三个参数分别是HMMSet结构指针、MemHeap结构指针和allowTMods的布尔值。因此这两个结构大体长什么样子肯定要有所了解,否则后面的操作、赋值、计算都不明了。而allowTMods是个开关量。
typedef struct _HMMSet{MemHeap *hmem; /* memory heap for this HMM Set */ Boolean *firstElem; /* first element added to hmem during MakeHMMSet*/char *hmmSetId; /* identifier for the hmm set */MILink mmfNames; /* List of external file names */int numLogHMM; /* Num of logical HMM's */int numPhyHMM; /* Num of distinct physical HMM's */int numFiles; /* total number of ext files */int numMacros; /* num macros used in this set */MLink * mtab; /* Array[0..MACHASHSIZE-1]OF MLink */PtrMap ** pmap; /* Array[0..PTRHASHSIZE-1]OF PtrMap* */Boolean allowTMods; /* true if HMMs can have Tee Models */Boolean optSet; /* true if global options have been set */short vecSize; /* dimension of observation vectors */short swidth[SMAX]; /* [0]=num streams,[i]=width of stream i */ParmKind pkind; /* kind of obs vector components */DurKind dkind; /* kind of duration model (model or state) */CovKind ckind; /* cov kind - only global in V1.X */HSetKind hsKind; /* kind of HMM set */TMixRec tmRecs[SMAX]; /* array[1..S]of tied mixture record */int numStates; /* Number of states in HMMSet */int numSharedStates; /* Number of shared states in HMMSet */int numMix; /* Number of mixture components in HMMSet */int numSharedMix; /* Number of shared mixtures in HMMSet */int numTransP; /* Number of distinct transition matrices */int ckUsage[NUMCKIND]; /* Number of components using given ckind */InputXForm *xf; /* Input transform of HMMSet */AdaptXForm *semiTied; /* SemiTied transform associated with model set */short projSize; /* dimension of vector to update *//* Adaptation information accumulates */Boolean attRegAccs; /* have the set of accumulates been attached */Boolean attXFormInfo; /* have the set of adapt info been attached */Boolean attMInfo; /* have the set of adapt info been attached */AdaptXForm *curXForm;AdaptXForm *parentXForm;/* Added to support LogWgts */Boolean logWt; /* Component weights are stored as Logs *//* Added to support delayed loading of the semi-tied transform */char *semiTiedMacro; /* macroname of semi-tied transform */} HMMSet;
看每个数据项的注释,大体意思应该比较明白了。其中第一项就是HHMSet需要涉及的内存空间,用来保存模型参数,也就是这个函数第二个形参的目的。
typedef struct {char *name; /* name of this memory heap */HeapType type; /* type of this heap */float growf; /* succ blocks grow as 1+growf */size_t elemSize; /* size of each elem 1 always */size_t minElem; /* init #elems per blk init #bytes per blk */size_t maxElem; /* max #elems per block max #bytes per blk */size_t curElem; /* current #elems per blk curr #bytes per blk */size_t totUsed; /* total #elems used total #bytes used */size_t totAlloc; /* total #elems alloc'ed total #bytes alloc'd */BlockP heap; /* linked list of blocks */Boolean protectStk; /* MSTAK only, prevents disposal below Stack Top */
}MemHeap;
该HMMSet定义了hmm模型的个数,状态数,观察向量的维度转移概率矩阵的个数等等。在这个函数里只是初始化这样的结构体,而具体值需要后面根据模型文件来构建模型时填充。
CreateHMMSet(&hset,&gstack,FALSE);pathPattern[0]='\0';while (NextArg() == SWITCHARG) {s = GetSwtArg();if (strlen(s)!=1) HError(2019,"HCompV: Bad switch %s; must be single letter",s);switch(s[0]){case 'f':if (NextArg() != FLOATARG)HError(2019,"HCompV: Variance floor scale expected");vFloorScale = GetChkedFlt(0.0,100.0,s);break;case 'l':if (NextArg() != STRINGARG)HError(2019,"HCompV: Segment label expected");segLab = GetStrArg();break;case 'm':meanUpdate = TRUE;break;case 'o':outfn = GetStrArg();break; case 'v':if (NextArg() != FLOATARG)HError(2019,"HCompV: Minimum variance level expected");minVar = GetChkedFlt(0.0,100.0,s);break;case 'k':if (NextArg() != STRINGARG)HError(2019,"HCompV: speaker pattern expected");strcpy(spPattern,GetStrArg());if (strchr(spPattern,'%')==NULL)HError(2019,"HCompV: Speaker mask invalid");break;case 'c':if (NextArg() != STRINGARG)HError(2019,"HCompV: CMV output dir expected");strcpy(cmDir,GetStrArg());DoCMV = TRUE;break;case 'p':if (NextArg() != STRINGARG)HError(2019,"HCompV: path pattern expected");strcpy(pathPattern,GetStrArg());if (strchr(pathPattern,'%')==NULL)HError(2019,"HCompV: Path mask invalid");break;case 'q':if (NextArg() != STRINGARG)HError(2019,"HCompV: output flags (nmv)");strcpy(oflags,GetStrArg());break;case 'B':saveBinary = TRUE;break;case 'F':if (NextArg() != STRINGARG)HError(2019,"HCompV: Data File format expected");if((dff = Str2Format(GetStrArg())) == ALIEN)HError(-2089,"HCompV: Warning ALIEN Data file format set");break;case 'G':if (NextArg() != STRINGARG)HError(2019,"HCompV: Label File format expected");if((lff = Str2Format(GetStrArg())) == ALIEN)HError(-2089,"HCompV: Warning ALIEN Label file format set");break;case 'H':if (NextArg() != STRINGARG)HError(2019,"HCompV: HMM macro file name expected");AddMMF(&hset,GetStrArg());break;case 'I':if (NextArg() != STRINGARG)HError(2019,"HCompV: MLF file name expected");LoadMasterFile(GetStrArg());break;case 'L':if (NextArg()!=STRINGARG)HError(2019,"HCompV: Label file directory expected");labDir = GetStrArg();break;case 'M':if (NextArg()!=STRINGARG)HError(2019,"HCompV: Output macro file directory expected");outDir = GetStrArg();break;case 'T':if (NextArg() != INTARG)HError(2019,"HCompV: Trace value expected");trace = GetChkedInt(0,077,s); break;case 'X':if (NextArg()!=STRINGARG)HError(2019,"HCompV: Label file extension expected");labExt = GetStrArg();break;default:HError(2019,"HCompV: Unknown switch %s",s);}}/* if not doing CMV, do standard HCompV */if (DoCMV == FALSE){if (NextArg()!=STRINGARG)HError(2019,"HCompV: Source HMM file name expected");hmmfn = GetStrArg();Initialise();do {if (NextArg()!=STRINGARG)HError(2019,"HCompV: Training data file name expected");datafn = GetStrArg();LoadFile(datafn);} while (NumArgs()>0);SetCovs();FixGConsts(hmmLink);SaveModel(outfn); if (trace&T_TOP)printf("Output written to directory %s\n",(outDir==NULL)?"./":outDir);if (vFloorScale>0.0)PutVFloor();}
while训练语句就是在处理命令行的参数,不断的读取由initshell处理后剩下的参数,在HCompV命令中,有-f 0.01 -m -M dir proto等7个参数,处理完毕后,设置了全局变量vFloorScale(协方差的最低值系数),meanUpdate(bool,同时更新期望值),outDir(模型的输出目录)和hmmfn(描述模型的文件名)。
接着,默认情况下DoCMV为false,因此进入if第一个分支,设置hmmfn值为proto,它就是我们之前为HMM模型描述的结构。然后执行Initialise()函数。
4)Initialise()
函数前面的注释 /* Initialise: load HMMs and create accumulators */
下面把这个函数的主要调用关系罗列出来,然后分析它们各自都干了什么。
void Initialise(void)
{int s,V;Boolean eSep;char base[MAXSTRLEN];char path[MAXSTRLEN];char ext[MAXSTRLEN];/* Load HMM defs */ if(MakeOneHMM(&hset,BaseOf(hmmfn,base))<SUCCESS)HError(2028,"Initialise: MakeOneHMM failed");if(LoadHMMSet(&hset,PathOf(hmmfn,path),ExtnOf(hmmfn,ext))<SUCCESS)HError(2028,"Initialise: LoadHMMSet failed");SetParmHMMSet(&hset);/* Create a heap to store the input data */CreateHeap(&iStack,"InBuf", MSTAK, 1, 0.5, 100000, LONG_MAX);/* Get a pointer to the physical HMM */hmmId = GetLabId(base,FALSE);macroLink = FindMacroName(&hset,'h',hmmId);hmmLink = (HLink)macroLink->structure;/* Find out for which streams full covariance is needed */CheckVarianceKind( );/* Create accumulators for the mean and variance */for (s=1;s<=hset.swidth[0]; s++){V = hset.swidth[s];accs[s].meanSum=CreateVector(&gstack,V);ZeroVector(accs[s].meanSum);if (fullcNeeded[s]) {accs[s].squareSum.inv=CreateSTriMat(&gstack,V);accs[s].fixed.inv=CreateSTriMat(&gstack,V);ZeroTriMat(accs[s].squareSum.inv);}else {accs[s].squareSum.var=CreateSVector(&gstack,V);accs[s].fixed.var=CreateSVector(&gstack,V);ZeroVector(accs[s].squareSum.var);}}/* Create an object to hold the input parameters */SetStreamWidths(hset.pkind,hset.vecSize,hset.swidth,&eSep);obs=MakeObservation(&gstack,hset.swidth,hset.pkind,FALSE,eSep);if(segLab != NULL) {segId = GetLabId(segLab,TRUE);}}
可以看出首先调用MakeOneHMM,看看它完成了哪些操作。
/* EXPORT->MakeOneHMM: Create a singleton for the HMM hname */
ReturnStatus MakeOneHMM(HMMSet *hset, char *hname)
{if(InitHMMSet(hset, hname, TRUE)<SUCCESS){ResetHMMSet(hset);return(FAIL);}return(SUCCESS);
}
它实际上调用了初始化InitHMMSet函数,通过读取proto这个hmm模型文件。proto文件的内容如下:
~o <VecSize> 39 <MFCC_0_D_A>
~h "proto"
<BeginHMM><NumStates> 5<State> 2<Mean> 390.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <Variance> 391.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0<State> 3<Mean> 390.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <Variance> 391.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0<State> 4<Mean> 390.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 <Variance> 391.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0<TransP> 50.0 1.0 0.0 0.0 0.00.0 0.6 0.4 0.0 0.00.0 0.0 0.6 0.4 0.00.0 0.0 0.0 0.7 0.30.0 0.0 0.0 0.0 0.0
<EndHMM>
其实这个hmm原型文件描述了HMM主要的参数,包括5个状态,观察向量的维度是39,它是语音的MFCC以及一阶、二阶参数(MFCC_0_D_A)。这个原型文件只描述了一个hmm模型,名字就是"proto"。
在InitHMMSet函数中调用了CreateHMM函数完成模型的单个模型的构建。模型名称为“proto”,并保存到HMMSet中,但是实际上这时这个模型的参数没有设定,只是有了一个名字的空模型。
在接下来的循环语句中,LoadFile函数读取每个MFCC文件,并计算特征向量的均值和方差,通过累加的方式。
SetCovs()函数完成方差的计算,并复制给每个状态,来初始化状态的概率密度函数。
/* SetCovs: set covariance values in hmm */
void SetCovs(void)
{int i,s,m;StateElem *se;StreamElem *ste;MixtureElem *me;MixPDF *mp;CalcCovs(); // 计算均值和方差 保存在全局的CovAcc对象中。if (trace&T_TOP) {printf("Updating HMM ");if (meanUpdate) printf("Means and ");printf("Covariances\n");}for (i=2,se=hmmLink->svec+2; i < hmmLink->numStates; i++,se++)for (s=1,ste=se->info->pdf+1; s <= hset.swidth[0]; s++,ste++)for (m=1,me = ste->spdf.cpdf+1; m<=ste->nMix; m++, me++) {mp = me->mpdf;if (meanUpdate && !IsSeenV(mp->mean)){ /* meanSum now holds mean */CopyVector(accs[s].meanSum,mp->mean); // 把计算的均方差复制给模型的状态TouchV(mp->mean);}if (!IsSeenV(mp->cov.var)){if (mp->ckind==FULLC)CopyMatrix(accs[s].fixed.inv,mp->cov.inv);else if (fullcNeeded[s]) /* dont need full cov, but its all we have */ TriDiag2Vector(accs[s].fixed.inv,mp->cov.var);elseCopyVector(accs[s].fixed.var,mp->cov.var);TouchV(mp->cov.var);}}ClearSeenFlags(&hset,CLR_ALL);
}
- php的api接口
- 【Delphi学习】Form的borderstyle属性
- componentDidMount,react
- 完美解决Ubuntu16.04虚拟机窗口全屏问题
- linux启动停止重启MySQL的命令
- 软件质量有什么特性?
- 软件构造复习小结(2)——设计规约(Specification)
- VC学习资料,
- itoa函数
- 了解Intel IPP
- 基于离散小波变换 (DWT)的图像信息隐藏算法
- 9个免费的 CSS 生成器网站
- 在Python中,可以使用try
- curl命令详解 (实例使用方法)
- idea各种中文乱码问题
- SQL Server DBCC
- QSqlQuery内存问题分析及解决