-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_train.py
More file actions
119 lines (81 loc) · 3.41 KB
/
model_train.py
File metadata and controls
119 lines (81 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import tensorflow as tf
import numpy as np
from aux import replaceUsingIndex, getIndexFromFile, LoadNewTrainFile, LoadTestFile
import sys
# create indices from vocab files
index_of_words = getIndexFromFile(filename = 'data/vocabs.word')
index_of_labels = getIndexFromFile(filename = 'data/vocabs.labels')
index_of_pos = getIndexFromFile(filename = 'data/vocabs.pos')
index_of_actions = getIndexFromFile(filename = 'data/vocabs.actions')
n_words = len(index_of_words)
n_tags = len(index_of_pos)
n_labels = len(index_of_labels)
n_actions = len(index_of_actions)
print(n_words, n_tags, n_labels)
new_train_file = 'data/train_with_indices.data'
#new_test_file = 'data/dev_with_indices.data'
# Create temporary training file
replaceUsingIndex(oldfilename = 'data/train.data', newfilename = new_train_file,
indices = [index_of_words, index_of_pos, index_of_labels, index_of_actions])
# Create temporary test file
#replaceUsingIndex(oldfilename = 'data/dev.data', newfilename = new_test_file,
# indices = [index_of_words, index_of_pos, index_of_labels, index_of_actions])
# Load feature matrix and target labels from the new training file
train_data, train_labels = LoadNewTrainFile(filename = 'data/train_with_indices.data')
# test_data, test_labels = LoadTestFile(filename = 'data/dev_with_indices.data')
print(train_data.shape, train_labels.shape)
from keras.models import Model
from keras.layers import Dense, Input, Embedding, Reshape, Concatenate, Lambda
import keras
# Dimension of word embedding
dw = 64
# Dimension of tag embeddings
dt = 32
# Dimension of dependency label embeddings
dl = 32
def output_shape_words(input_shape):
assert(len(list(input_shape)) == 2)
assert(input_shape[1] == 52)
return (input_shape[0], 20)
def output_shape_tags(input_shape):
assert(len(list(input_shape)) == 2)
assert(input_shape[1] == 52)
return (input_shape[0], 20)
def output_shape_labels(input_shape):
assert(len(list(input_shape)) == 2)
assert(input_shape[1] == 52)
return (input_shape[0], 12)
X = Input(shape = (52, ))
words = Lambda(function = lambda x: x[:, 0: 20], output_shape = output_shape_words)(X)
tags = Lambda(function = lambda x: x[:, 20: 20 + 20], output_shape = output_shape_tags)(X)
labels = Lambda(function = lambda x: x[:, 40: 40 + 41], output_shape = output_shape_labels)(X)
embedding_words = Embedding(
input_dim = n_words,
output_dim = 64,
input_length = 20,
)(words)
embedding_words = Reshape(target_shape = (20 * 64, ))(embedding_words)
embedding_tags = Embedding(
input_dim = n_tags,
output_dim = 32,
input_length = 20
)(tags)
embedding_tags = Reshape(target_shape = (32 * 20,) )(embedding_tags)
embedding_labels = Embedding(
input_dim = n_labels,
output_dim = 32,
input_length = 12
)(labels)
embedding_labels = Reshape(target_shape = (32 * 12, ))(embedding_labels)
# concatenate the embeddings
embeddings = Concatenate(axis = 1)([embedding_words, embedding_tags, embedding_labels])
h1 = Dense(units = 200, activation = 'relu')(embeddings)
h2 = Dense(units = 200, activation = 'relu')(h1)
q = Dense(units = 93, activation = 'softmax')(h2)
model = Model(inputs = [X], outputs = [q])
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
#
print(model.summary())
model.fit(train_data, train_labels, epochs = 7, batch_size = 1000)
# In[16]:
model.save(filepath = 'saved_models/model1.h5')