1.3 基于GMM-HMM算法的语音识别
准备数据集
数据预处理
导入第三方库
from python_speech_features import mfcc
from scipy.io import wavfile
from hmmlearn import hmm
import joblib
import numpy as np
import os定义 生成训练标签 函数
# 生成wavdict,key=wavid,value=wavfile
def gen_wavlist(wavpath):
wavdict = {}
labeldict = {}
for (dirpath, dirnames, filenames) in os.walk(wavpath):
for filename in filenames:
if filename.endswith('.wav'):
filepath = os.sep.join([dirpath, filename])
fileid = filename.strip('.wav')
wavdict[fileid] = filepath
label = fileid.split('_')[1]
labeldict[fileid] = label
return wavdict, labeldict定义 提取 MFCC 特征 函数
# 特征提取,feat = compute_mfcc(wadict[wavid])
def compute_mfcc(file):
fs, audio = wavfile.read(file)
mfcc_feat = mfcc(audio)
return mfcc_feat构建模型类
class Model():
# 初始化模型
def __init__(self, CATEGORY=None, n_comp=3, n_mix = 3, cov_type='diag', n_iter=1000):
super(Model, self).__init__()
self.CATEGORY = CATEGORY
self.category = len(CATEGORY)
self.n_comp = n_comp
self.n_mix = n_mix
self.cov_type = cov_type
self.n_iter = n_iter
# 关键步骤,初始化models,返回特定参数的模型的列表
self.models = []
for k in range(self.category):
# 定义模型,使用 hmmlearn 中 hmm 的 GMMHMM 方法
model = hmm.GMMHMM(n_components=self.n_comp, n_mix = self.n_mix,
covariance_type=self.cov_type, n_iter=self.n_iter)
self.models.append(model)
# 模型训练,设置迭代次数为10,可自行修改
def train(self, wavdict=None, labeldict=None):
print("开始训练...")
for k in range(10):
subdata = []
model = self.models[k]
for x in wavdict:
if labeldict[x] == self.CATEGORY[k]:
mfcc_feat = compute_mfcc(wavdict[x])
model.fit(mfcc_feat)
# 使用特定的测试集合进行测试
def test(self, filepath):
result = []
for k in range(self.category):
subre = []
label = []
model = self.models[k]
mfcc_feat = compute_mfcc(filepath)
# 生成每个数据在当前模型下的得分情况
re = model.score(mfcc_feat)
subre.append(re)
result.append(subre)
# 选取得分最高的种类
result = np.vstack(result).argmax(axis=0)
# 返回种类的类别标签
result = [self.CATEGORY[label] for label in result]
print('识别得到标签:\n',result)
print('识别的结果: \n', class_dict[result[0]])
# 保存模型
def save(self, path):
# 利用external joblib保存生成的hmm模型
joblib.dump(self.models, path)
# 导入模型,默认为训练好的模型
def load(self, path="models.pkl"):
# 导入hmm模型
# path = "/data/shixunfiles/750de5d44ed17deb86d073a2e9305c60_1602056138430.pkl" # 准备好的模型,可不用
self.models = joblib.load(path)开始训练
import warnings
warnings.filterwarnings("ignore")
CATEGORY = ['1', '2', '3', '4', '5', '6', '7', '8', '9','10'] # 语音训练数据分类,每个数字代表一个语音命令种类
wavdict, labeldict = gen_wavlist('training_data') # 数据预处理
# testdict, testlabel = gen_wavlist('test_data')
# 模型加载
models = Model(CATEGORY=CATEGORY) # 模型加载
# 开始训练
models.train(wavdict=wavdict, labeldict=labeldict)
print("完成训练,保存模型...")
models.save("models.pkl")测试模型
版权声明:本文为weixin_44850744原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。