# Code


import numpy
import random
import math
from matplotlib import pyplot
from matplotlib import colors

reward=[[[]]]
states=[[]]
def output():
global reward
for a in reward:
print(a)
print("\n")

def idealPath(goal):
global states
eligible= [{}]
i = 10
while i > 0:
s = states[-i]
j = 0
while j < len(s):
try:
eligible[j][str(s[j])] += 1
except KeyError:
# print(s[j])
# print(eligible[j])
eligible[j][str(s[j])] = 1
except IndexError:
eligible.append({str(s[j]): 1})
finally:
j+=1
i-=1
ideal = []
confidence = []
for a in eligible:
ideal.append(sorted(a.items(),key=lambda item: item[1])[-1][0])
confidence.append(sorted(a.items(), key=lambda item: item[1])[-1][1])
if sorted(a.items(),key=lambda item: item[1])[-1][0] == goal:
return ideal
return ideal

def maxIndex(s):
global reward
rewar = reward[s[1]][s[0]]
max=0
b=0
while b < 4:
if rewar[b]>rewar[max]:
if b == 0:
if s[1]-1 > 0:
max = b
elif b == 1:
if s[1]+1 < len(reward):
max = b
elif b == 2:
if s[0]-1 > 0 :
max = b
elif b == 3:
if s[0]+1 < len(reward[1]) :
max = b
b+=1
return max

def epsilon(s,epsilon):
if random.randrange(0,100) < epsilon:
a = random.randrange(0, 4)
while True:
if a == 0:
if s[1]-1 > 0:
# print('Epsilon Action: r ' + str(a))
break
elif a == 1:
if s[1]+1 < len(reward):
# print('Epsilon Action: r ' + str(a))
break
elif a == 2:
if s[0]-1 > 0 :
# print('Epsilon Action: r ' + str(a))
break
elif a == 3:
if s[0]+1 < len(reward[1]) :
# print('Epsilon Action: r '+ str(a))
break
a = random.randrange(0,4)
else:
a = maxIndex(s)
# print('Epsilon Action: d ' + str(a))
return a

def getQ(s,a):
q=reward[s[1]][s[0]][a]
return q

def getR(s):
r=reward[s[1]][s[0]][4]
return r

def nextS(a,state,wind):
s2=[-1,-1]
s2[0]=state[0]
s2[1]=state[1]
if a == 0:
s2[1]=s2[1] - 1
elif a == 1:
s2[1]=s2[1] + 1
elif a == 2:
s2[0]=s2[0] - 1
elif a == 3:
s2[0]=s2[0] + 1
s2[1] -= wind[s2[0]][s2[1]]
return s2

def bellman(s,s2,a,a2,gamma,alpha):
q = getQ(s,a) + alpha *(getR(s2)+gamma * getQ(s2,a2)-getQ(s,a))
return q

def episode(s,ep,gamma,alpha,goal,wind):
global reward
epsi=ep * 100
a = epsilon(s, epsi)
while getR(s) != 100 and reward[s[1]][s[0]][4] != -100:
print_s=s
print_a=a
print_Q_old=reward[s[1]][s[0]][a]
states[-1].append(''+(str(s[0]) + ',' + str(s[1]))+'')
s2=nextS(a,s,wind)
a2=epsilon(s2,epsi)
print_Q_new=reward[s[1]][s[0]][a]
s=s2
a=a2
print('state: ',str(print_s), 'action: ',str(print_a), 'Q_old: ', print_Q_old, 'Q_new_: ', print_Q_new )
states[-1].append(''+(str(s[0]) + ',' +str(s[1]))+'')
if getR(s) == 100:
states[-1].append('' + str(goal[0]) + ','+ str(goal[1]) + '')

def get_wind(start,goal,n,m):
w = [[0 for i in range(m)] for i in range(n)]
for i in range(start[0] + 1, goal[0]):
rand_var = (numpy.random.choice([0, 1, 2])).tolist()
w[i] = [rand_var for f in range(m)]
return w
def init(gamma,alpha,epsi,n,m,goal,start,sample,map): #n is x axis, m is y axis
global reward
global states
global wind
#sets up reward and quality matrix
wind = [[0 for i in range(m)] for i in range(n)]
if map == 'n':
reward = [[([0]*4+[-1]) for i in range(n)] for i in range(m)]
elif map == 'c':
reward = [[([0] * 4 + [-1]) for i in range(n)] for i in range(math.ceil(m/2))]
half2 = [[([0] * 4 + [-100]) for i in range(n)] for i in range(m - int(m/2))]
for i in range(len(reward)):
reward.append(half2[i])
elif map == 'w':
wind=get_wind(start,goal,n,m)
print(wind)
reward = [[([0] * 4 + [-100]) for i in range(n)] for i in range(3)]
main = [[([0] * 4 + [-1]) for i in range(n)] for i in range(m-3)]
for i in range(len(main)):
reward.append(main[i])
a=0
output()
#Sets quality of actions along nothern/souther boundaries to -100
while a < len(reward[0]):
reward[0][a][0]=-10
reward[len(reward[0])-1][a][1] = -10
a+=1
a=0
# Sets quality of actions along western/eastern boundaries to -100
while a < len(reward):
reward[a][0][2]=-10
reward[a][len(reward[0])-1][3] = -10
a+=1
reward[goal[1]][goal[0]][4] = 100
i=0
states=[]
while i < sample:
states.append([])
print('episode number: ', i + 1)
episode(start, epsi, gamma, alpha, goal, wind)
print(len(states[i])-1)
i += 1
states.append(idealPath(str(goal[0]) + ',' + str(goal[1])))

def visualization(n,m,map,start,goal,wind):
colormap = colors.ListedColormap(["white", "black","green", "yellow", "red"])
#print(len(states))
for j in range(len(states) + 1):  # decides which episodes to run
pyplot.figure(figsize=(20, 20))
pyplot.get_current_fig_manager().full_screen_toggle()
pyplot.title(j+1)
grid = numpy.array(numpy.zeros([m, n]))
pyplot.xticks(range(0, n), range(0, n, 1))
pyplot.yticks(range(0, m), range(0, m, 1))
# print((int(m/2)))
pyplot.xlabel("x")
pyplot.ylabel("y")
pyplot.ion()
for i in range(len(states[j])):
grid[goal[1],goal[0]]=3
grid[start[1],start[0]]=4
if map == 'c':
for l in range(int(m / 2), m):
for k in range(n):
grid[l, k] = 1
elif map == 'w':
for l in range(3):
for k in range(n):
grid[l, k] = 1
for l in range(3, m):
for k in range(n):
if wind[k][l] == 1:
pyplot.arrow(k, l + 0.5, 0, -0.5, length_includes_head=False,
elif wind[k][l] == 2:
pyplot.arrow(k - 0.1, l + 0.5, 0, -0.5, length_includes_head=False,
pyplot.arrow(k + 0.1, l + 0.5, 0, -0.5, length_includes_head=False,
x = int(states[j][i].split(',')[0])
y = int(states[j][i].split(',')[1])
grid[y, x] = 2  # other color for state
print(x, y)
pyplot.imshow(grid, cmap=colormap)
# pyplot.draw()
pyplot.pause(0.01)  # time showing current state
grid[y, x] = 0
pyplot.clf()
pyplot.close()
def arrow_visualization(n,m,map):
colormap = colors.ListedColormap(["white", "black"])
pyplot.figure(figsize=(n,m))
pyplot.xlabel("x")
pyplot.ylabel("y")
grid = numpy.array(numpy.zeros([n, n]))
j= len(states)-1
I=len(states[j])-1
pyplot.autoscale(enable=False,axis='both')
pyplot.gca().invert_yaxis()
pyplot.xticks(range(0,n), range(0, n, 1))
pyplot.yticks(range(0,m), range(0,m,1))
for i in range(I):
print(states[j][i])
x=int(states[j][i].split(',')[0])
y=int(states[j][i].split(',')[1])
x_=int(states[j][i+1].split(',')[0])
y_=int(states[j][i+1].split(',')[1])
pyplot.arrow(x, y,x_-x,y_-y)
x=x_
y=y_
if map == 'c':
for l in range(int(m / 2) , m):
for k in range(n):
grid[l, k] = 1
elif map == 'w':
for l in range(3):
for k in range(n):
grid[l, k] = 1
for l in range(3, m):
for k in range(n):
if wind[k][l] == 1:
pyplot.arrow(k, l + 0.5, 0, -0.5, length_includes_head=False,
elif wind[k][l] == 2:
pyplot.arrow(k - 0.1, l + 0.5, 0, -0.5, length_includes_head=False,