-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathGreedyCaption.py
More file actions
67 lines (58 loc) · 1.99 KB
/
GreedyCaption.py
File metadata and controls
67 lines (58 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import matplotlib.pyplot as plt
import pickle
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras import Input, layers
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
vocabulary = pickle.load(open("Preprocessed Data/vocabulary.pkl", "rb"))
model = load_model('model_weights/FinalModel.h5')
max_len =71
#word<--->index mappng
word2index={}
index2word={}
index=1
for word in vocabulary:
word2index[word]=index
index2word[index]=word
index+=1
def greedySearch(image_feature):
in_text = 'startseq'
for i in range(max_len):
sequence = [word2index[w] for w in in_text.split() if w in word2index]
sequence = pad_sequences([sequence], maxlen=max_len)
predict = model.predict([image_feature,sequence], verbose=0)
predict = np.argmax(predict)
word = index2word[predict]
in_text += ' ' + word
if word == 'endseq':
break
final = in_text.split()
final = final[1:-1]
final = ' '.join(final)
return final
#pretrained CNN model
CNNmodel = InceptionV3(weights='imagenet')
CNNmodel = Model(CNNmodel.input,CNNmodel.layers[-2].output)
#print("Extracting Features of Images:Wait.....")
#preprocessing image
def preprocess_image(image_path):
img = image.load_img(image_path,target_size=(299,299))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array,axis=0)
img_array = preprocess_input(img_array)
return img_array
#image feature extraction
def encode_image(image):
img =preprocess_image(image)
feature_vec = CNNmodel.predict(img)
feature_vec = np.reshape(feature_vec,feature_vec.shape[1])
return feature_vec
def generate_caption(path):
feature = encode_image(path).reshape((1,2048))
#x=plt.imread('images/image1.jpeg')
#plt.imshow(x)
return str(greedySearch(feature))