Skip to content

Commit 7c6c1ff

Browse files
committed
修改算法实现部分错误
1 parent 9cbad01 commit 7c6c1ff

1 file changed

Lines changed: 27 additions & 26 deletions

File tree

docs/python/code.ipynb

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": 1,
13+
"execution_count": 5,
1414
"id": "e1014345",
1515
"metadata": {},
1616
"outputs": [],
@@ -40,7 +40,7 @@
4040
},
4141
{
4242
"cell_type": "code",
43-
"execution_count": 2,
43+
"execution_count": 6,
4444
"id": "857723f4",
4545
"metadata": {},
4646
"outputs": [],
@@ -58,7 +58,7 @@
5858
},
5959
{
6060
"cell_type": "code",
61-
"execution_count": 3,
61+
"execution_count": 7,
6262
"id": "10f962e5",
6363
"metadata": {},
6464
"outputs": [],
@@ -208,7 +208,7 @@
208208
},
209209
{
210210
"cell_type": "code",
211-
"execution_count": 4,
211+
"execution_count": 11,
212212
"id": "1b269f11",
213213
"metadata": {},
214214
"outputs": [],
@@ -251,14 +251,15 @@
251251
" # 移除gym依赖,手动定义动作空间和观测空间\n",
252252
" self.action_space_size = 5 # 5个动作:停留、上、右、下、左\n",
253253
" \n",
254-
" # 动作到方向向量的映射\n",
254+
" \n",
255+
" # 动作到方向向量的映射(使用行列坐标)\n",
255256
" self.action_to_direction = {\n",
256257
" 0: np.array([0, 0]), # 停留\n",
257-
" 1: np.array([0, 1]), # 上\n",
258-
" 2: np.array([1, 0]), # 右\n",
259-
" 3: np.array([0, -1]), # 下\n",
260-
" 4: np.array([-1, 0]), # 左\n",
261-
" }\n",
258+
" 1: np.array([-1, 0]), # 上(row减小,col不变)\n",
259+
" 2: np.array([0, 1]), # 右(row不变,col增大)\n",
260+
" 3: np.array([1, 0]), # 下(row增大,col不变)\n",
261+
" 4: np.array([0, -1]), # 左(row不变,col减小)\n",
262+
"}\n",
262263
"\n",
263264
" self.reward_list = reward_list if reward_list is not None else [0, 1, -10, -1]\n",
264265
"\n",
@@ -361,7 +362,7 @@
361362
},
362363
{
363364
"cell_type": "code",
364-
"execution_count": 5,
365+
"execution_count": 9,
365366
"id": "6e687781",
366367
"metadata": {},
367368
"outputs": [],
@@ -512,7 +513,7 @@
512513
},
513514
{
514515
"cell_type": "code",
515-
"execution_count": 6,
516+
"execution_count": 12,
516517
"id": "8ee18b4d",
517518
"metadata": {},
518519
"outputs": [
@@ -530,33 +531,33 @@
530531
"name": "stdout",
531532
"output_type": "stream",
532533
"text": [
533-
"算法耗时: 0.1589 秒, 剩余迭代次数: 903\n",
534+
"算法耗时: 0.1376 秒, 剩余迭代次数: 903\n",
534535
"策略矩阵:\n",
535-
"[[0. 0. 1. 0. 0.]\n",
536+
"[[0. 0. 0. 1. 0.]\n",
537+
" [0. 0. 0. 0. 1.]\n",
538+
" [0. 0. 0. 0. 1.]\n",
539+
" [0. 0. 0. 0. 1.]\n",
540+
" [0. 0. 0. 0. 1.]\n",
536541
" [0. 0. 0. 1. 0.]\n",
542+
" [0. 0. 0. 0. 1.]\n",
543+
" [0. 1. 0. 0. 0.]\n",
537544
" [0. 0. 0. 1. 0.]\n",
538545
" [0. 0. 0. 1. 0.]\n",
539546
" [0. 0. 0. 1. 0.]\n",
540-
" [0. 0. 1. 0. 0.]\n",
541547
" [0. 0. 0. 1. 0.]\n",
542-
" [0. 0. 0. 0. 1.]\n",
543-
" [0. 0. 1. 0. 0.]\n",
544548
" [0. 0. 1. 0. 0.]\n",
549+
" [1. 0. 0. 0. 0.]\n",
550+
" [0. 0. 0. 0. 1.]\n",
545551
" [0. 0. 1. 0. 0.]\n",
546552
" [0. 0. 1. 0. 0.]\n",
547-
" [0. 1. 0. 0. 0.]\n",
548-
" [1. 0. 0. 0. 0.]\n",
549553
" [0. 0. 0. 1. 0.]\n",
550554
" [0. 1. 0. 0. 0.]\n",
551555
" [0. 1. 0. 0. 0.]\n",
552556
" [0. 0. 1. 0. 0.]\n",
553-
" [0. 0. 0. 0. 1.]\n",
554-
" [0. 0. 0. 0. 1.]\n",
555-
" [0. 1. 0. 0. 0.]\n",
556-
" [0. 1. 0. 0. 0.]\n",
557-
" [0. 1. 0. 0. 0.]\n",
558-
" [0. 1. 0. 0. 0.]\n",
559-
" [0. 0. 0. 0. 1.]]\n",
557+
" [0. 0. 1. 0. 0.]\n",
558+
" [0. 0. 1. 0. 0.]\n",
559+
" [0. 0. 1. 0. 0.]\n",
560+
" [0. 1. 0. 0. 0.]]\n",
560561
"状态值函数:\n",
561562
"[3.48645648 3.13777804 2.82396745 2.54153791 2.28735133 3.87387697\n",
562563
" 3.48645648 2.54153791 9.99967208 8.99967208 4.30434418 4.78264108\n",

0 commit comments

Comments
 (0)