|
10 | 10 | }, |
11 | 11 | { |
12 | 12 | "cell_type": "code", |
13 | | - "execution_count": 1, |
| 13 | + "execution_count": 5, |
14 | 14 | "id": "e1014345", |
15 | 15 | "metadata": {}, |
16 | 16 | "outputs": [], |
|
40 | 40 | }, |
41 | 41 | { |
42 | 42 | "cell_type": "code", |
43 | | - "execution_count": 2, |
| 43 | + "execution_count": 6, |
44 | 44 | "id": "857723f4", |
45 | 45 | "metadata": {}, |
46 | 46 | "outputs": [], |
|
58 | 58 | }, |
59 | 59 | { |
60 | 60 | "cell_type": "code", |
61 | | - "execution_count": 3, |
| 61 | + "execution_count": 7, |
62 | 62 | "id": "10f962e5", |
63 | 63 | "metadata": {}, |
64 | 64 | "outputs": [], |
|
208 | 208 | }, |
209 | 209 | { |
210 | 210 | "cell_type": "code", |
211 | | - "execution_count": 4, |
| 211 | + "execution_count": 11, |
212 | 212 | "id": "1b269f11", |
213 | 213 | "metadata": {}, |
214 | 214 | "outputs": [], |
|
251 | 251 | " # 移除gym依赖,手动定义动作空间和观测空间\n", |
252 | 252 | " self.action_space_size = 5 # 5个动作:停留、上、右、下、左\n", |
253 | 253 | " \n", |
254 | | - " # 动作到方向向量的映射\n", |
| 254 | + " \n", |
| 255 | + " # 动作到方向向量的映射(使用行列坐标)\n", |
255 | 256 | " self.action_to_direction = {\n", |
256 | 257 | " 0: np.array([0, 0]), # 停留\n", |
257 | | - " 1: np.array([0, 1]), # 上\n", |
258 | | - " 2: np.array([1, 0]), # 右\n", |
259 | | - " 3: np.array([0, -1]), # 下\n", |
260 | | - " 4: np.array([-1, 0]), # 左\n", |
261 | | - " }\n", |
| 258 | + " 1: np.array([-1, 0]), # 上(row减小,col不变)\n", |
| 259 | + " 2: np.array([0, 1]), # 右(row不变,col增大)\n", |
| 260 | + " 3: np.array([1, 0]), # 下(row增大,col不变)\n", |
| 261 | + " 4: np.array([0, -1]), # 左(row不变,col减小)\n", |
| 262 | + "}\n", |
262 | 263 | "\n", |
263 | 264 | " self.reward_list = reward_list if reward_list is not None else [0, 1, -10, -1]\n", |
264 | 265 | "\n", |
|
361 | 362 | }, |
362 | 363 | { |
363 | 364 | "cell_type": "code", |
364 | | - "execution_count": 5, |
| 365 | + "execution_count": 9, |
365 | 366 | "id": "6e687781", |
366 | 367 | "metadata": {}, |
367 | 368 | "outputs": [], |
|
512 | 513 | }, |
513 | 514 | { |
514 | 515 | "cell_type": "code", |
515 | | - "execution_count": 6, |
| 516 | + "execution_count": 12, |
516 | 517 | "id": "8ee18b4d", |
517 | 518 | "metadata": {}, |
518 | 519 | "outputs": [ |
|
530 | 531 | "name": "stdout", |
531 | 532 | "output_type": "stream", |
532 | 533 | "text": [ |
533 | | - "算法耗时: 0.1589 秒, 剩余迭代次数: 903\n", |
| 534 | + "算法耗时: 0.1376 秒, 剩余迭代次数: 903\n", |
534 | 535 | "策略矩阵:\n", |
535 | | - "[[0. 0. 1. 0. 0.]\n", |
| 536 | + "[[0. 0. 0. 1. 0.]\n", |
| 537 | + " [0. 0. 0. 0. 1.]\n", |
| 538 | + " [0. 0. 0. 0. 1.]\n", |
| 539 | + " [0. 0. 0. 0. 1.]\n", |
| 540 | + " [0. 0. 0. 0. 1.]\n", |
536 | 541 | " [0. 0. 0. 1. 0.]\n", |
| 542 | + " [0. 0. 0. 0. 1.]\n", |
| 543 | + " [0. 1. 0. 0. 0.]\n", |
537 | 544 | " [0. 0. 0. 1. 0.]\n", |
538 | 545 | " [0. 0. 0. 1. 0.]\n", |
539 | 546 | " [0. 0. 0. 1. 0.]\n", |
540 | | - " [0. 0. 1. 0. 0.]\n", |
541 | 547 | " [0. 0. 0. 1. 0.]\n", |
542 | | - " [0. 0. 0. 0. 1.]\n", |
543 | | - " [0. 0. 1. 0. 0.]\n", |
544 | 548 | " [0. 0. 1. 0. 0.]\n", |
| 549 | + " [1. 0. 0. 0. 0.]\n", |
| 550 | + " [0. 0. 0. 0. 1.]\n", |
545 | 551 | " [0. 0. 1. 0. 0.]\n", |
546 | 552 | " [0. 0. 1. 0. 0.]\n", |
547 | | - " [0. 1. 0. 0. 0.]\n", |
548 | | - " [1. 0. 0. 0. 0.]\n", |
549 | 553 | " [0. 0. 0. 1. 0.]\n", |
550 | 554 | " [0. 1. 0. 0. 0.]\n", |
551 | 555 | " [0. 1. 0. 0. 0.]\n", |
552 | 556 | " [0. 0. 1. 0. 0.]\n", |
553 | | - " [0. 0. 0. 0. 1.]\n", |
554 | | - " [0. 0. 0. 0. 1.]\n", |
555 | | - " [0. 1. 0. 0. 0.]\n", |
556 | | - " [0. 1. 0. 0. 0.]\n", |
557 | | - " [0. 1. 0. 0. 0.]\n", |
558 | | - " [0. 1. 0. 0. 0.]\n", |
559 | | - " [0. 0. 0. 0. 1.]]\n", |
| 557 | + " [0. 0. 1. 0. 0.]\n", |
| 558 | + " [0. 0. 1. 0. 0.]\n", |
| 559 | + " [0. 0. 1. 0. 0.]\n", |
| 560 | + " [0. 1. 0. 0. 0.]]\n", |
560 | 561 | "状态值函数:\n", |
561 | 562 | "[3.48645648 3.13777804 2.82396745 2.54153791 2.28735133 3.87387697\n", |
562 | 563 | " 3.48645648 2.54153791 9.99967208 8.99967208 4.30434418 4.78264108\n", |
|
0 commit comments