语音情感识别python代码实现(二)

更新时间:2023-06-23 22:17:58 阅读: 评论:0

语⾳情感识别python代码实现(⼆)
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import keras
import pickle
import os
import librosa
from keras import layers
from keras import models
from keras import optimizers
from keras.utils import to_categorical
path=r'G:\test.wav'
y,sr = librosa.load(path,sr=None)
def normalizeVoiceLen(y,normalizedLen):
nframes=len(y)
y = np.reshape(y,[nframes,1]).T
#归⼀化⾳频长度为2s,32000数据点
if(nframes<normalizedLen):
res=normalizedLen-nframes
res_s([1,res],dtype=np.float32)
y = np.reshape(y,[nframes,1]).T
y=np.c_[y,res_data]
el:
y=y[:,0:normalizedLen]
return y[0]
def getNearestLen(framelength,sr):
framesize = framelength*sr
#找到与当前framesize最接近的2的正整数次⽅
nfftdict = {}
lists = [32,64,128,256,512,1024]
for i in lists:
nfftdict[i] = abs(framesize - i)
sortlist = sorted(nfftdict.items(), key=lambda x: x[1])#按与当前framesize差值升序排列
framesize = int(sortlist[0][0])#取最接近当前framesize的那个2的正整数次⽅值为新的framesize
return framesize
VOICE_LEN=32000
#获得N_FFT的长度
N_FFT=getNearestLen(0.25,sr)
#统⼀声⾳范围为前两秒
y=normalizeVoiceLen(y,VOICE_LEN)
print(y.shape)
#提取mfcc特征
mfcc_data=librosa.feature.mfcc(y=y, sr=sr,n_mfcc=13,n_fft=N_FFT,hop_length=int(N_FFT/4))
# 画出特征图,将MFCC可视化。转置矩阵,使得时域是⽔平的
plt.matshow(mfcc_data)
plt.title('MFCC')
counter=0
fileDirCASIA = r'G:\Google download\CASIA databa'
mfccs={}
mfccs['angry']=[]
mfccs['fear']=[]
mfccs['happy']=[]
mfccs['neutral']=[]
mfccs['sad']=[]
mfccs['surpri']=[]
mfccs['disgust']=[]
listdir=os.listdir(fileDirCASIA)
for persondir in listdir:
if(not r'.' in persondir):
emotionDirName=os.path.join(fileDirCASIA,persondir)
emotionDirName=os.path.join(fileDirCASIA,persondir)
emotiondir=os.listdir(emotionDirName)
for ed in emotiondir:
if(not r'.' in ed):
filesDirName=os.path.join(emotionDirName,ed)
files=os.listdir(filesDirName)
for fileName in files:
if(fileName[-3:]=='wav'):
counter+=1
fn=os.path.join(filesDirName,fileName)
print(str(counter)+fn)
y,sr = librosa.load(fn,sr=None)
y=normalizeVoiceLen(y,VOICE_LEN)#归⼀化长度
mfcc_data=librosa.feature.mfcc(y=y, sr=sr,n_mfcc=13,n_fft=N_FFT,hop_length=int(N_FFT/4))                        an(mfcc_data,axis=0)
mfccs[ed].list())
with open('mfcc_feature_dict.pkl', 'wb') as f:
pickle.dump(mfccs, f)
#读取特征
mfccs={}
with open('mfcc_feature_dict.pkl', 'rb') as f:
mfccs=pickle.load(f)
#设置标签
emotionDict={}
emotionDict['angry']=0
emotionDict['fear']=1
emotionDict['happy']=2
emotionDict['neutral']=3
emotionDict['sad']=4blast
emotionDict['surpri']=5
data=[]
labels=[]
data=data+mfccs['angry']
print(len(mfccs['angry']))
for i in range(len(mfccs['angry'])):
labels.append(0)pmh
data=data+mfccs['fear']
print(len(mfccs['fear']))
for i in range(len(mfccs['fear'])):
labels.append(1)
print(len(mfccs['happy']))
data=data+mfccs['happy']
for i in range(len(mfccs['happy'])):
labels.append(2)
print(len(mfccs['neutral']))
data=data+mfccs['neutral']
for i in range(len(mfccs['neutral'])):
labels.append(3)
print(len(mfccs['sad']))
data=data+mfccs['sad']advis
for i in range(len(mfccs['sad'])):
labels.append(4)
print(len(mfccs['surpri']))
data=data+mfccs['surpri']
for i in range(len(mfccs['surpri'])):
labels.append(5)
print(len(data))
print(len(labels))
#设置数据维度
data=np.array(data)rca是什么意思
shape((data.shape[0],data.shape[1],1))
labels=np.array(labels)
卧龙岗大学
labels=to_categorical(labels)
#数据标准化
DATA_an(data,axis=0)
DATA_STD=np.std(data,axis=0)
DATA_STD=np.std(data,axis=0)
data-=DATA_MEAN
data/=DATA_STD
paraDict={}
paraDict['mean']=DATA_MEAN
paraDict['std']=DATA_STD
paraDict['emotion']=emotionDict
with open('mfcc_model_para_dict.pkl', 'wb') as f:
startpickle.dump(paraDict, f)
ratioTrain=0.8
numTrain=int(data.shape[0]*ratioTrain)
permutation = np.random.permutation(data.shape[0])
data = data[permutation,:]
labels = labels[permutation,:]
x_train=data[:numTrain]
x_val=data[numTrain:]
y_train=labels[:numTrain]
y_val=labels[numTrain:]
model = Sequential()
model.add(Conv1D(256, 5,padding='same',input_shape=(126,1)))
model.add(Activation('relu'))
model.add(Conv1D(128, 5,padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(128, 5,padding='same',))
as far asmodel.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Conv1D(128, 5,padding='same',))
#model.add(Activation('relu'))
#model.add(Dropout(0.2))
model.add(Conv1D(128, 5,padding='same',))
model.add(Activation('relu'))
女孩子的英文名字model.add(Flatten())
model.add(Den(6))
model.add(Activation('softmax'))
opt = sprop(lr=0.00001, decay=1e-6)
cnnhistory=model.fit(x_train, y_train, batch_size=16, epochs=300, validation_data=(x_val, y_val)) plt.plot(cnnhistory.history['loss'])
plt.plot(cnnhistory.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')什么是宾语从句
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
model_name = 'Emotion_Voice_Detection_Model.h5'
save_dir = os.path.wd(), 'saved_models')
# Save model and weights
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)
import json
model_json = _json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# loading json and creating model
dels import model_from_json
json_file = open('model.json', 'r')
loaded_model_json = ad()
json_file.clo()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
# load weights into new model
loaded_model.load_weights("saved_models/Emotion_Voice_Detection_Model.h5")
print("Loaded model from disk")
# evaluate loaded model on test data
pile(loss='categorical_crosntropy', optimizer=opt, metrics=['accuracy']) score = loaded_model.evaluate(x_val,y_val,verbo=0)
青春旋律print("%s: %.2f%%" % (ics_names[1], score[1]*100))

本文发布于:2023-06-23 22:17:58,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/90/155324.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:正整数   特征   设置   数据   转置   矩阵   识别   排列
相关文章
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图