-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproblem6.py
More file actions
134 lines (111 loc) · 4.96 KB
/
Copy pathproblem6.py
File metadata and controls
134 lines (111 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
problem = "problem6"
student_name = "Abudi Alshamam"
student_numer = "N1212353"
import random
import os
import pandas as pd
from collections import Counter, defaultdict
def text_analyser(user_text):
# Load the text file
file_path = os.path.join("gutenberg20", user_text)
try:
with open(file_path, "r", encoding="utf-8") as file:
text = file.read()
except FileNotFoundError:
print(f"File {user_text} not found in directory 'gutenberg20'.")
return
# Preprocess the text: remove punctuation and convert to lowercase
words = text.lower().split()
words = [word.strip(".,!?;:\"'()[]{}") for word in words]
# Build a graph of word relationships
graph = defaultdict(set)
for i in range(len(words) - 1):
graph[words[i]].add(words[i + 1])
# Create a Pandas DataFrame for analysis
word_counts = Counter(words)
df = pd.DataFrame({
"Word": list(word_counts.keys()),
"Frequency": list(word_counts.values()),
"Unique Neighbours": [len(graph[word]) for word in word_counts.keys()]
})
while True:
# Display options to the user
print('''
Options:
1) Return a count of the number of distinct words in the text
2) Return the most frequent word in the text, along with its frequency
3) Return the word that has the largest number of unique neighbours
4) Return the word that has the smallest number of unique neighbours
5) Other descriptive statistics using Pandas DataFrames or Series
6) Find the shortest path between two words
7) Exit
8) Generate a random sentence using DFS from the word graph
''')
# Get user input for the desired option
option = input("Enter the number corresponding to your choice: ")
# Find the number of distinct words in the text
if option == "1":
print(f"Number of distinct words: {len(word_counts)}")
# Find the most frequent word in the text
elif option == "2":
most_frequent = df.loc[df["Frequency"].idxmax()]
print(f"Most frequent word: '{most_frequent['Word']}' with frequency {most_frequent['Frequency']}")
# Find the word with the largest number of unique neighbours
elif option == "3":
largest_neighbours = df.loc[df["Unique Neighbours"].idxmax()]
print(f"Word with the largest number of unique neighbours: '{largest_neighbours['Word']}' ({largest_neighbours['Unique Neighbours']} neighbours)")
# Find the word with the smallest number of unique neighbours
elif option == "4":
smallest_neighbours = df.loc[df["Unique Neighbours"].idxmin()]
print(f"Word with the smallest number of unique neighbours: '{smallest_neighbours['Word']}' ({smallest_neighbours['Unique Neighbours']} neighbours)")
# Display other descriptive statistics using Pandas
elif option == "5":
print("Descriptive statistics:")
print(df.describe())
# Find the shortest path between two words using BFS
elif option == "6":
word1 = input("Enter the first word: ").lower()
word2 = input("Enter the second word: ").lower()
if word1 not in graph or word2 not in graph:
print("One or both words are not in the text.")
continue
queue = [(word1, [word1])]
visited = set()
while queue:
current_word, path = queue.pop(0)
if current_word == word2:
print(f"Shortest path: {' -> '.join(path)}")
break
if current_word not in visited:
visited.add(current_word)
for neighbor in graph[current_word]:
queue.append((neighbor, path + [neighbor]))
else:
print("No such path exists.")
elif option == "7":
print("Exiting the program.")
break
elif option == "8":
start_word = input("Enter a start word (or press Enter to choose randomly): ").lower()
if not start_word:
start_word = random.choice(list(graph.keys()))
print(f"Randomly chosen start word: {start_word}")
elif start_word not in graph:
print("The chosen start word is not in the text.")
continue
# Generate a random sentence using DFS
sentence = []
visited = set()
def dfs(word):
if word in visited or len(sentence) >= 20:
return
visited.add(word)
sentence.append(word)
if graph[word]:
next_word = random.choice(list(graph[word]))
dfs(next_word)
dfs(start_word)
print(f"Generated sentence: {' '.join(sentence)}")
else:
print("Invalid option. Please try again.")
text_analyser("RobinHood.txt")