《自然语言处理技术:Python实战》第一章 初识自然语言处理技术(1.3)

1.3 基于GMM-HMM算法的语音识别

准备数据集

数据预处理

导入第三方库

from python_speech_features import mfcc
from scipy.io import wavfile
from hmmlearn import hmm
import joblib
import numpy as np
import os

定义 生成训练标签 函数

# 生成wavdict,key=wavid,value=wavfile
def gen_wavlist(wavpath):
    wavdict = {}
    labeldict = {}
    for (dirpath, dirnames, filenames) in os.walk(wavpath):
        for filename in filenames:
            if filename.endswith('.wav'):
                filepath = os.sep.join([dirpath, filename])
                fileid = filename.strip('.wav')
                wavdict[fileid] = filepath
                label = fileid.split('_')[1]
                labeldict[fileid] = label
    return wavdict, labeldict

定义 提取 MFCC 特征 函数

# 特征提取,feat = compute_mfcc(wadict[wavid])
def compute_mfcc(file):
    fs, audio = wavfile.read(file)
    mfcc_feat = mfcc(audio)
    return mfcc_feat

构建模型类

class Model():
    #  初始化模型
    def __init__(self, CATEGORY=None, n_comp=3, n_mix = 3, cov_type='diag', n_iter=1000):
        super(Model, self).__init__()
        self.CATEGORY = CATEGORY
        self.category = len(CATEGORY)
        self.n_comp = n_comp
        self.n_mix = n_mix
        self.cov_type = cov_type
        self.n_iter = n_iter
        # 关键步骤,初始化models,返回特定参数的模型的列表
        self.models = []
        for k in range(self.category):
            # 定义模型,使用 hmmlearn 中 hmm 的 GMMHMM 方法
            model = hmm.GMMHMM(n_components=self.n_comp, n_mix = self.n_mix, 
                                covariance_type=self.cov_type, n_iter=self.n_iter)
            self.models.append(model)

    # 模型训练,设置迭代次数为10,可自行修改
    def train(self, wavdict=None, labeldict=None):
        print("开始训练...")
        for k in range(10):
            subdata = []
            model = self.models[k]
            for x in wavdict:
                if labeldict[x] == self.CATEGORY[k]:
                    mfcc_feat = compute_mfcc(wavdict[x])
                    model.fit(mfcc_feat)

    # 使用特定的测试集合进行测试
    def test(self, filepath):
        result = []
        for k in range(self.category):
            subre = []
            label = []
            model = self.models[k]
            mfcc_feat = compute_mfcc(filepath)
            # 生成每个数据在当前模型下的得分情况
            re = model.score(mfcc_feat)
            subre.append(re)
            result.append(subre)
        # 选取得分最高的种类
        result = np.vstack(result).argmax(axis=0)
        # 返回种类的类别标签
        result = [self.CATEGORY[label] for label in result]
        print('识别得到标签:\n',result)
        print('识别的结果: \n', class_dict[result[0]])

    # 保存模型
    def save(self, path):
        # 利用external joblib保存生成的hmm模型
        joblib.dump(self.models, path)

    # 导入模型,默认为训练好的模型
    def load(self, path="models.pkl"):
        # 导入hmm模型
        #  path = "/data/shixunfiles/750de5d44ed17deb86d073a2e9305c60_1602056138430.pkl" # 准备好的模型,可不用
        self.models = joblib.load(path)

开始训练

import warnings
warnings.filterwarnings("ignore")

CATEGORY = ['1', '2', '3', '4', '5', '6', '7', '8', '9','10'] # 语音训练数据分类,每个数字代表一个语音命令种类
wavdict, labeldict = gen_wavlist('training_data') # 数据预处理
# testdict, testlabel = gen_wavlist('test_data')
# 模型加载
models = Model(CATEGORY=CATEGORY) # 模型加载
# 开始训练
models.train(wavdict=wavdict, labeldict=labeldict)
print("完成训练,保存模型...")
models.save("models.pkl")

测试模型


版权声明:本文为weixin_44850744原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。