DependencyParsing/model_train.py at master · AMaini503/DependencyParsing · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import tensorflow as tf
import numpy as np
from aux import replaceUsingIndex, getIndexFromFile, LoadNewTrainFile, LoadTestFile
import sys


# create indices from vocab files
index_of_words = getIndexFromFile(filename = 'data/vocabs.word')
index_of_labels = getIndexFromFile(filename = 'data/vocabs.labels')
index_of_pos = getIndexFromFile(filename = 'data/vocabs.pos')
index_of_actions = getIndexFromFile(filename = 'data/vocabs.actions')


n_words = len(index_of_words)
n_tags = len(index_of_pos)
n_labels = len(index_of_labels)
n_actions = len(index_of_actions)
print(n_words, n_tags, n_labels)

new_train_file = 'data/train_with_indices.data'
#new_test_file = 'data/dev_with_indices.data'

# Create temporary training file
replaceUsingIndex(oldfilename = 'data/train.data', newfilename = new_train_file,
                  indices = [index_of_words, index_of_pos, index_of_labels, index_of_actions])

# Create temporary test file
#replaceUsingIndex(oldfilename = 'data/dev.data', newfilename = new_test_file,
#                  indices = [index_of_words, index_of_pos, index_of_labels, index_of_actions])


# Load feature matrix and target labels from the new training file
train_data, train_labels = LoadNewTrainFile(filename = 'data/train_with_indices.data')
# test_data, test_labels = LoadTestFile(filename = 'data/dev_with_indices.data')

print(train_data.shape, train_labels.shape)
from keras.models import Model
from keras.layers import Dense, Input, Embedding, Reshape, Concatenate, Lambda
import keras


# Dimension of word embedding
dw = 64

# Dimension of tag embeddings
dt = 32

# Dimension of dependency label embeddings
dl = 32

def output_shape_words(input_shape):
    assert(len(list(input_shape)) == 2)
    assert(input_shape[1] == 52)
    return (input_shape[0], 20)

def output_shape_tags(input_shape):
    assert(len(list(input_shape)) == 2)
    assert(input_shape[1] == 52)
    return (input_shape[0], 20)

def output_shape_labels(input_shape):
    assert(len(list(input_shape)) == 2)
    assert(input_shape[1] == 52)
    return (input_shape[0], 12)


X = Input(shape = (52, ))


words = Lambda(function = lambda x: x[:, 0: 20], output_shape = output_shape_words)(X)
tags = Lambda(function = lambda x: x[:, 20: 20 + 20], output_shape = output_shape_tags)(X)
labels = Lambda(function = lambda x: x[:, 40: 40 + 41], output_shape = output_shape_labels)(X)

embedding_words = Embedding(
    input_dim = n_words,
    output_dim = 64,
    input_length = 20,
)(words)

embedding_words = Reshape(target_shape = (20 * 64, ))(embedding_words)

embedding_tags = Embedding(
    input_dim = n_tags,
    output_dim = 32,
    input_length = 20
)(tags)

embedding_tags = Reshape(target_shape = (32 * 20,) )(embedding_tags)

embedding_labels = Embedding(
    input_dim = n_labels,
    output_dim = 32,
    input_length = 12
)(labels)

embedding_labels = Reshape(target_shape = (32 * 12, ))(embedding_labels)


# concatenate the embeddings
embeddings = Concatenate(axis = 1)([embedding_words, embedding_tags, embedding_labels])

h1 = Dense(units = 200, activation = 'relu')(embeddings)
h2 = Dense(units = 200, activation = 'relu')(h1)

q = Dense(units = 93, activation = 'softmax')(h2)

model = Model(inputs = [X], outputs = [q])
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
#
print(model.summary())
model.fit(train_data, train_labels, epochs = 7, batch_size = 1000)


# In[16]:


model.save(filepath = 'saved_models/model1.h5')