【发布时间】:2019-01-13 11:49:10
【问题描述】:
我试图实现值迭代算法。 我有一个网格
grid = [[0, 0, 0, +1],
[0, "W", 0, -1],
[0, 0, 0, 0]]
行动清单
actlist = {UP:1, DOWN:2, LEFT:3, RIGHT:4}
还有一个奖励函数
reward = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
我写了一个函数 T,它返回 3 个元组的元组。
def T(i,j,actions):
if(i == 0 and j == 0):
if(actions == UP):
return (i,i,0.8),(i,i,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i+1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,i,0.1),(i+1,j,0.1)
elif (i == 0 and j == 1):
if(actions == UP):
return (i,i,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)
elif(i == 0 and j == 2):
if(actions == UP):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return(i+1,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i+1,j,0.1)
elif(i == 0 and j == 3):
if(actions == UP):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
elif(actions == DOWN):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
elif(actions == LEFT):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
elif(actions == RIGHT):
return (-1,-1,0.8),(-1,-1,0.1),(-1,-1,0.1)
# 2nd row
elif (i == 1 and j == 0):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == DOWN):
return (i+1,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(i == 1 and j ==1):
if(actions == UP):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j,0.8),(i,j,0.1),(i,j,0.1)
elif (i == 1 and j == 2):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i+1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i+1,j,0.1)
elif(i == 1 and j == 3):
if(actions == UP):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
elif(actions == DOWN):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
elif(actions == LEFT):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
elif(actions == RIGHT):
return (-2,-2,0.8),(-2,-2,0.1),(-2,-2,0.1)
# 3rd row
elif(i == 2 and j == 0):
if(actions == UP):
return (i-1,j,0.8),(i,j,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j,0.1),(i,j+1,1,0.1)
elif(actions == LEFT):
return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)
elif (i == 2 and j == 1):
if(actions == UP):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i,j,0.1),(i,j,0.1)
elif(i == 2 and j == 2):
if(actions == UP):
return (i-1,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j+1,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i-1,j,0.1),(i,j,1)
elif(actions == RIGHT):
return (i,j+1,0.8),(i-1,j,0.1),(i,j,0.1)
elif(i == 2 and j == 3):
if(actions == UP):
return (i-1,j,0.8),(i,j-1,0.1),(i,j,0.1)
elif(actions == DOWN):
return (i,j,0.8),(i,j-1,0.1),(i,j,0.1)
elif(actions == LEFT):
return (i,j-1,0.8),(i-1,j,0.1),(i,j,0.1)
elif(actions == RIGHT):
return (i,j,0.8),(i-1,j,0.1),(i,j,0.1)
这个函数在值迭代函数中被调用:
def value_iteration():
U1 = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
while True:
U=U1.copy()
delta = 0
for i in range(len(grid)):
for j in range(len(grid[i])):
U1[i][j] = max(sum(p*(R(k,l)+gamma*U[k][l]) for (k,l,p) in T(i,j,a)) for a in actlist)
print(i,j,U1[i][j])
delta = max(delta, abs(U1[i][j] - U[i][j]))
if delta <= epsilon*(1 - gamma)/gamma:
return U
问题是,for 循环的前两次迭代输出良好
0 0
0 1
0 2
0 3
1 0
1 1
1 2
1 3
但随后代码因错误而停止
ValueError: too many values to unpack (expected 3)
【问题讨论】:
-
您能否提供完整的错误消息,以便我们知道错误的确切触发位置?
-
U1[i][j] = max(sum(p*(R(k,l)+gamma*U[k][l]) for (k,l,p) in T(i,j,a)) for a in actlist)此行触发错误。 -
附带问题:如果您的所有功能都像表格一样,为什么不使用字典?
-
@EdwardMinnix,我不知道该怎么做。
-
@ShifatEArman 您只需创建一个字典,其中键是 3 元素元组,其值为
(i, j, action),其值是您将从函数返回的结果(尽管它们需要硬编码。如果你对字典不熟悉,我建议你参考the official documentation
标签: python python-3.x tuples artificial-intelligence valueerror