-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path4_1.py
More file actions
executable file
·33 lines (24 loc) · 965 Bytes
/
4_1.py
File metadata and controls
executable file
·33 lines (24 loc) · 965 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/python3
from aux import getWordCount
def modifyRareWords(count_of_words):
new_train_file_name = 'ner_train_rare.dat'
old_train_file_name = 'ner_train.dat'
with open(new_train_file_name, 'w+') as fnew, open(old_train_file_name) as fold:
for line in fold:
#new line is added manually afterwards, so striped
tokens = line.strip().split()
new_line = ''
#if not a new line
if(len(tokens) > 0):
word = tokens[0]
if(count_of_words[word] < 5):
tokens[0] = '_RARE_'
new_line = ' '.join(tokens)
new_line = new_line + '\n'
else:
new_line = line
else:
new_line = line
fnew.write(new_line)
count_of_words = getWordCount()
modifyRareWords(count_of_words)