-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathpredict_emotion.py
More file actions
114 lines (79 loc) · 3.53 KB
/
Copy pathpredict_emotion.py
File metadata and controls
114 lines (79 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import librosa
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from PIL import Image
import numpy as np
import requests
from io import BytesIO
import tensorflow as tf
from tf_keras.models import load_model
from tf_keras.models import model_from_json
import tempfile
# from tf_keras.models import mo
import caleb_files.data_generators as dg
def audio_to_img(file_name):
samples, sample_rate = librosa.load(file_name, sr=None)
sgram = librosa.stft(samples)
fig, ax = plt.subplots(nrows=1, ncols=1, sharex=True)
sgram_mag, _ = librosa.magphase(sgram)
mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sample_rate)
mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.min)
img = librosa.display.specshow(mel_sgram,y_axis='linear', x_axis='time', ax = ax)
plt.axis('off')
canvas = FigureCanvasAgg(plt.gcf())
# Render the plot as a numpy array
canvas.draw()
image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
image = image.reshape(canvas.get_width_height()[::-1] + (3,)) # Reshape to height, width, channels
non_white_indices = np.where(image != 255) # Assuming white is represented as 255
min_x, min_y = np.min(non_white_indices[1]), np.min(non_white_indices[0])
max_x, max_y = np.max(non_white_indices[1]), np.max(non_white_indices[0])
# Convert numpy array to PIL Image object
return Image.fromarray(image[min_y:max_y, min_x:max_x])
def make_prediction(img, model):
trainable_img = img.resize((dg.targ_height, dg.targ_width))
trainable_arr = np.array(trainable_img) / 255.0
# print(trainable_arr)
prediction = model.predict(np.reshape(trainable_arr, (-1, 256, 256, 3)))
prediction = np.ravel(prediction)
sorted_indices = np.argsort(prediction)
print(prediction)
print(sorted_indices[::-1])
tf.random.set_seed(42)
np.random.seed(24)
class_indices = dg.get_data().class_indices
# Reverse the dictionary to map indices to class names
class_names = {v: k for k, v in class_indices.items()}
print(class_names)
img = audio_to_img('wav/03a01Wa.wav')
# img2 = audio_to_img('wav/03a01Nc.wav')
model_url = "https://huggingface.co/umop-ap1sdn/CNN_Spectrogram_Emotion/resolve/main/CNN_model.keras"
architecture_url = "https://huggingface.co/umop-ap1sdn/CNN_Spectrogram_Emotion/resolve/main/CNN_model.json"
weights_url = "https://huggingface.co/umop-ap1sdn/CNN_Spectrogram_Emotion/resolve/main/CNN_model_weights.h5"
model_raw = requests.get(model_url)
# arch = requests.get(architecture_url)
# weights = requests.get(weights_url)
# Load the model from the response content
# model1 = model_from_json(arch.content)
# Load the model from the bytes
with tempfile.NamedTemporaryFile(delete=True, suffix=".keras") as temp_file:
temp_file.write(model_raw.content)
temp_file.flush()
model1 = load_model(temp_file.name)
# You can delete the temporary file after loading the model
# temp_file.unlink(temp_file.name)
# model1.load_model(BytesIO(weights.content))
# model1 = load_model("https://huggingface.co/umop-ap1sdn/CNN_Spectrogram_Emotion/")
model2 = load_model("CNN_model.keras")
'''
for layer in model1.layers:
if isinstance(layer, tf.keras.layers.Dropout):
layer.rate = 0.0 # Set dropout rate to 0 for inference
for layer in model2.layers:
if isinstance(layer, tf.keras.layers.Dropout):
layer.rate = 0.0 # Set dropout rate to 0 for inference
'''
# model.summary()
make_prediction(img, model1)
make_prediction(img, model1)
make_prediction(img, model2)