-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
54 lines (42 loc) · 1.3 KB
/
Copy pathrun.py
File metadata and controls
54 lines (42 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# run.py
import gym
import numpy as np
from maze_env import SimpleMazeEnv
import json
import os
# Load the saved Q-table history and episode iteration data
file_path = os.path.join('output', 'q_learning_data.json')
with open(file_path, 'r') as f:
data = json.load(f)
q_table_history = data['q_table_history']
q_table = np.array(q_table_history[-1]) # Load the Q-table after the last episode
# Define the maze
maze = [
[0, 1, 0, 0, 0],
[0, 1, 0, 1, 0],
[0, 0, 0, 1, 0],
[1, 1, 0, 1, 0],
[0, 0, 0, 1, 0]
]
# Create the same maze environment used during training
env = SimpleMazeEnv(maze)
def state_to_index(state):
# Convert (x, y) state tuple to a single integer index
x, y = state
index = x * len(maze[0]) + y
return index
# Run the agent in the environment using the trained Q-table
total_steps = 0
state = env.reset() # Reset the environment
state_index = state_to_index(state)
done = False
print("Running the trained model...")
while not done:
# Choose the best action (exploitation only, no exploration)
action = np.argmax(q_table[state_index])
next_state, reward, done, _ = env.step(action)
next_state_index = state_to_index(next_state)
total_steps += 1
state_index = next_state_index
if done:
print(f"Goal reached in {total_steps} steps.")