Skip to content

Commit 7aee980

Browse files
committed
add 51la final
1 parent b094919 commit 7aee980

125 files changed

Lines changed: 72827 additions & 13766 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/overrides/partials/head.html renamed to .history/docs/overrides/partials/head_20260330162958.html

File renamed without changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<script charset="UTF-8" id="LA_COLLECT" src="//sdk.51.la/js-sdk-pro.min.js"></script>
2+
<script>
3+
LA.init({
4+
id: "3PT4FUvdHzohQSRh",
5+
ck: "3PT4FUvdHzohQSRh"
6+
});
7+
</script>
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{{ super() }}
2+
3+
<script charset="UTF-8" id="LA_COLLECT" src="//sdk.51.la/js-sdk-pro.min.js"></script>
4+
<script>
5+
LA.init({
6+
id: "3PT4FUvdHzohQSRh",
7+
ck: "3PT4FUvdHzohQSRh"
8+
});
9+
</script>
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<script charset="UTF-8" id="LA_COLLECT" src="//sdk.51.la/js-sdk-pro.min.js"></script>
2+
<script>
3+
LA.init({
4+
id: "3PT4FUvdHzohQSRh",
5+
ck: "3PT4FUvdHzohQSRh"
6+
});
7+
</script>

.history/mkdocs_20260330162232.yml

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
site_name: 强化学习课程笔记
2+
site_url: https://wgyhhhh.github.io/Mathematical-Foundations-of-Reinforcement-Learning-Notes/
3+
use_directory_urls: true
4+
repo_url: https://github.com/wgyhhhh/Mathematical-Foundations-of-Reinforcement-Learning-Notes
5+
repo_name: wgyhhhh/Mathematical-Foundations-of-Reinforcement-Learning-Notes
6+
site_author: Guangyu
7+
site_description: 《强化学习的数学原理》的课程笔记
8+
9+
theme:
10+
icon:
11+
repo: fontawesome/brands/github-alt
12+
name: material
13+
palette:
14+
- media: "(prefers-color-scheme: light)"
15+
scheme: default
16+
primary: black
17+
accent: green
18+
toggle:
19+
icon: material/weather-sunny
20+
name: 明亮主题
21+
- media: "(prefers-color-scheme: dark)"
22+
scheme: slate
23+
primary: black
24+
accent: yellow
25+
toggle:
26+
icon: material/weather-night
27+
name: 暗黑主题
28+
logo: img/logo.jpg
29+
favicon: img/favicon-96x96.png
30+
custom_dir: docs/overrides
31+
32+
features:
33+
- announce.dismiss #呈现可标记为由用户读取的临时公告,可以包含一个用于取消当前公告的按钮
34+
# - navigation.instant
35+
#- header.autohide #自动隐藏
36+
- navigation.tracking #地址栏中的 URL 将自动更新为在目录中突出显示的活动锚点
37+
- navigation.tabs #顶级部分将呈现在上面视口标题下方的菜单层中,但在移动设备上保持原样
38+
# - navigation.tabs.sticky #启用粘性选项卡后,导航选项卡将锁定在标题下方,并在向下滚动时始终保持可见
39+
- navigation.sections #启用部分后,顶级部分在边栏中呈现为1220px以上视口的组,但在移动设备上保持原样
40+
- navigation.top # 返回顶部的按钮 在上滑时出现
41+
- navigation.footer #页脚将呈现在边栏中,但在移动设备上保持原样
42+
- search.suggest # 搜索输入一些字母时推荐补全整个单词
43+
- search.highlight # 搜索出的文章关键词加入高亮
44+
- search.share #搜索分享按钮
45+
- navigation.expand # 打开Tab时左侧目录全部展开
46+
- navigation.indexes #启用节索引页后,可以将文档直接附加到节
47+
- content.tabs.link
48+
- content.tooltips
49+
- content.code.copy #代码复制
50+
- content.action.edit
51+
- content.action.view
52+
- content.code.annotate
53+
language: zh # 一些提示性的文字会变成中文
54+
55+
56+
57+
extra:
58+
social: # 社交联系方式(页面右下角内容)
59+
- icon: fontawesome/brands/github
60+
link: https://github.com/wgyhhhh/
61+
name: GitHub
62+
- icon: fontawesome/brands/bilibili
63+
link: https://space.bilibili.com/281217178
64+
name: 哔哩哔哩
65+
- icon: fontawesome/solid/paper-plane
66+
link: mailto:wgyhhh001@gmail.com
67+
name: 联系作者
68+
version:
69+
provider: mike
70+
alias: true
71+
consent:
72+
actions:
73+
- accept
74+
- reject
75+
- manage
76+
title: Cookie 设置
77+
description: >-
78+
我们使用 cookies 来识别您的重复访问和偏好,以及衡量我们文档的有效性和用户是否找
79+
到他们正在搜索的内容。<br/>
80+
在您的同意下,您将帮助我们改进我们的文档。<br/>
81+
(您稍后仍可以在网页左下角重新修改 cookies 设置)
82+
analytics: # 谷歌统计
83+
provider: google
84+
property: G-C5GHN09YHC
85+
feedback:
86+
title: 该页面对你有帮助吗?
87+
ratings:
88+
- icon: material/emoticon-happy-outline
89+
name: 有帮助
90+
data: 1
91+
note: 感谢您的反馈!
92+
- icon: material/emoticon-neutral-outline
93+
name: 一般般
94+
data: 0
95+
note: 感谢您的反馈!
96+
- icon: material/emoticon-sad-outline
97+
name: 有待改进
98+
data: -1
99+
note: 感谢您的反馈!
100+
extra_javascript:
101+
- |
102+
<script charset="UTF-8" id="LA_COLLECT" src="//sdk.51.la/js-sdk-pro.min.js"></script>
103+
<script>LA.init({id:"3PT4FUvdHzohQSRh",ck:"3PT4FUvdHzohQSRh"})</script>
104+
105+
106+
nav:
107+
- 主页:
108+
- 欢迎: 'index.md'
109+
- 引言: 'Preface1.md'
110+
- 第一章:
111+
- 介绍: 'Chapter-1/intro.md'
112+
- 1.1-网格世界: 'Chapter-1/1-1.md'
113+
- 1.2-状态和行动: 'Chapter-1/1-2.md'
114+
- 1.3-状态转移: 'Chapter-1/1-3.md'
115+
- 1.4-策略: 'Chapter-1/1-4.md'
116+
- 1.5-奖励: 'Chapter-1/1-5.md'
117+
- 1.6-轨迹、回报、回合: 'Chapter-1/1-6.md'
118+
- 1.7-马尔科夫决策过程: 'Chapter-1/1-7.md'
119+
- 1.8-总结: 'Chapter-1/1-8.md'
120+
- 第二章:
121+
- 介绍: 'Chapter-2/intro.md'
122+
- 2.1-为什么回报很重要?: 'Chapter-2/2-1.md'
123+
- 2.2-如何计算回报?: 'Chapter-2/2-2.md'
124+
- 2.3-状态值: 'Chapter-2/2-3.md'
125+
- 2.4-贝尔曼方程: 'Chapter-2/2-4.md'
126+
- 2.5-贝尔曼方程的例子: 'Chapter-2/2-5.md'
127+
- 2.6-贝尔曼方程的矩阵形式: 'Chapter-2/2-6.md'
128+
- 2.7-求解状态值: 'Chapter-2/2-7.md'
129+
- 2.8-行动值: 'Chapter-2/2-8.md'
130+
- 2.9-总结: 'Chapter-2/2-9.md'
131+
- 第三章:
132+
- 介绍: 'Chapter-3/intro.md'
133+
- 3.1-如何改进策略: 'Chapter-3/3-1.md'
134+
- 3.2-最优状态值和最优策略: 'Chapter-3/3-2.md'
135+
- 3.3-贝尔曼最优公式: 'Chapter-3/3-3.md'
136+
- 3.4-从贝尔曼最优公式中求解最优策略: 'Chapter-3/3-4.md'
137+
- 3.5-影响最优策略的因素: 'Chapter-3/3-5.md'
138+
- 3.6-总结: 'Chapter-3/3-6.md'
139+
- 第四章:
140+
- 介绍: 'Chapter-4/intro.md'
141+
- 4.1-值迭代: 'Chapter-4/4-1.md'
142+
- 4.2-策略迭代: 'Chapter-4/4-2.md'
143+
- 4.3-截断策略迭代: 'Chapter-4/4-3.md'
144+
- 4.4-总结: 'Chapter-4/4-4.md'
145+
- 第五章:
146+
- 介绍: 'Chapter-5/intro.md'
147+
- 5.1-启发示例:期望值估计: 'Chapter-5/5-1.md'
148+
- 5.2-MC Basic:最简单的基于蒙特卡洛的算法: 'Chapter-5/5-2.md'
149+
- 5.3-MC Exploring Starts算法: 'Chapter-5/5-3.md'
150+
- 5.4-MC-Greedy算法: 'Chapter-5/5-4.md'
151+
- 5.5-探索与利用:以Greedy策略为例: 'Chapter-5/5-5.md'
152+
- 5.6-总结: 'Chapter-5/5-6.md'
153+
- 第六章:
154+
- 介绍: 'Chapter-6/intro.md'
155+
- 6.1-启发示例:期望值估计: 'Chapter-6/6-1.md'
156+
- 6.2-罗宾斯-门罗算法: 'Chapter-6/6-2.md'
157+
- 6.3-Dvoretzky定理: 'Chapter-6/6-3.md'
158+
- 6.4-随机梯度下降: 'Chapter-6/6-4.md'
159+
- 6.5-总结: 'Chapter-6/6-5.md'
160+
- 第七章:
161+
- 介绍: 'Chapter-7/intro.md'
162+
- 7.1-状态值估计:时序差分算法: 'Chapter-7/7-1.md'
163+
- 7.2-行动值估计:Sarsa: 'Chapter-7/7-2.md'
164+
- 7.3-行动值估计:n步Sarsa: 'Chapter-7/7-3.md'
165+
- 7.4-最优行动值估计:Q-Learning: 'Chapter-7/7-4.md'
166+
- 7.5-时序差分算法的统一框架: 'Chapter-7/7-5.md'
167+
- 7.6-总结: 'Chapter-7/7-6.md'
168+
- 第八章:
169+
- 介绍: 'Chapter-8/intro.md'
170+
- 8.1-价值表示:从表格到函数: 'Chapter-8/8-1.md'
171+
- 8.2-基于值函数的时序差分算法:状态值估计: 'Chapter-8/8-2.md'
172+
- 8.3-基于值函数的时序差分算法:行动值估计: 'Chapter-8/8-3.md'
173+
- 8.4-深度Q-learning: 'Chapter-8/8-4.md'
174+
- 8.5-总结: 'Chapter-8/8-5.md'
175+
- 第九章:
176+
- 介绍: 'Chapter-9/intro.md'
177+
- 9.1-策略表示:从表格到函数: 'Chapter-9/9-1.md'
178+
- 9.2-目标函数:定义最优策略: 'Chapter-9/9-2.md'
179+
- 9.3-目标函数的梯度: 'Chapter-9/9-3.md'
180+
- 9.4-蒙特卡洛策略梯度: 'Chapter-9/9-4.md'
181+
- 9.5-总结: 'Chapter-9/9-5.md'
182+
- 第十章:
183+
- 介绍: 'Chapter-10/intro.md'
184+
- 10.1-最简单的演员-评论性方法: 'Chapter-10/10-1.md'
185+
- 10.2-优势演员-评论性方法: 'Chapter-10/10-2.md'
186+
- 10.3-异策略演员-评论性方法: 'Chapter-10/10-3.md'
187+
- 10.4-确定性演员-评论性方法: 'Chapter-10/10-4.md'
188+
- 10.5-总结: 'Chapter-10/10-5.md'
189+
- Box(证明):
190+
- 介绍: 'Box/intro.md'
191+
- 2.1:迭代解法的收敛性证明: 'Box/Box-2-1.md'
192+
- 7.1:TD算法的推导: 'Box/Box-7-1.md'
193+
- 7.4:期望Sarsa算法: 'Box/Box-7-4.md'
194+
- 附录:
195+
- 术语: 'Appendix/1.md'
196+
- 核心算法实现:
197+
- 策略迭代与值迭代: 'python/code.ipynb'
198+
199+
plugins:
200+
- mkdocs-jupyter
201+
202+
markdown_extensions: #详见https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown-extensions/ 和 https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/
203+
- abbr
204+
- attr_list
205+
- admonition
206+
- def_list
207+
- footnotes
208+
- md_in_html
209+
- meta # 支持Markdown文件上方自定义标题标签等
210+
- pymdownx.caret
211+
- pymdownx.betterem
212+
- pymdownx.critic
213+
- pymdownx.details
214+
- pymdownx.inlinehilite
215+
- pymdownx.keys
216+
- pymdownx.mark
217+
- pymdownx.snippets
218+
- pymdownx.smartsymbols
219+
- pymdownx.tilde
220+
- pymdownx.superfences:
221+
custom_fences:
222+
- name: mermaid
223+
class: mermaid
224+
format:
225+
!!python/name:pymdownx.superfences.fence_code_format # 代码块高亮插件
226+
- pymdownx.arithmatex: # latex支持
227+
generic: true
228+
- toc:
229+
permalink: true # 固定标题位置为当前位置
230+
- pymdownx.highlight: # 代码块高亮
231+
anchor_linenums: true
232+
linenums: true # 显示行号
233+
# auto_title: true # 显示编程语言名称
234+
- pymdownx.tabbed:
235+
alternate_style: true
236+
- pymdownx.tasklist:
237+
custom_checkbox: true
238+
239+
extra_javascript:
240+
# - javascripts/extra.js #自定义javascript
241+
- https://Xiaokang2022.github.io/maliang/js/click-colorful.js # 鼠标点击效果
242+
- https://cdn.jsdelivr.net/gh/Wcowin/Wcowin.github.io@main/docs/javascripts/extra.js # extra的cdn
243+
#- https://cdn.jsdelivr.net/npm/mathjax@2/MathJax.js #Latex支持
244+
- https://polyfill.io/v3/polyfill.min.js?features=es6 #Latex支持
245+
- https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js #Latex支持
246+
# - ckplayer/js/ckplayer.js #播放器配置
247+
# - https://cdn.jsdelivr.net/npm/gitalk@latest/dist/gitalk.min.js #gitalk支持
248+
- https://cdn.jsdelivr.net/npm/mermaid@10.0.2/dist/add-html-label-6e56ed67.min.js #忘了
249+
- javascripts/extra.js
250+
- javascripts/mathjax.js #MathJax配置
251+
- https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
252+
253+
254+
extra_css:
255+
- css/misc.css #自定义css
256+
- stylesheets/extra.css
257+
extra_templates:
258+
# - sitemap.xml

docs/overrides/main.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{% extends "base.html" %}
2+
3+
{% block extrahead %}
4+
{{ super() }}
5+
<script charset="UTF-8" id="LA_COLLECT" src="https://sdk.51.la/js-sdk-pro.min.js"></script>
6+
<script>
7+
LA.init({
8+
id: "3PT4FUvdHzohQSRh",
9+
ck: "3PT4FUvdHzohQSRh"
10+
});
11+
</script>
12+
{% endblock %}

mkdocs.yml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,7 @@ extra:
9797
name: 有待改进
9898
data: -1
9999
note: 感谢您的反馈!
100-
extra_javascript:
101-
- |
102-
<script charset="UTF-8" id="LA_COLLECT" src="//sdk.51.la/js-sdk-pro.min.js"></script>
103-
<script>LA.init({id:"3PT4FUvdHzohQSRh",ck:"3PT4FUvdHzohQSRh"})</script>
104-
105-
106-
nav:
100+
nav:
107101
- 主页:
108102
- 欢迎: 'index.md'
109103
- 引言: 'Preface1.md'
@@ -255,4 +249,4 @@ extra_css:
255249
- css/misc.css #自定义css
256250
- stylesheets/extra.css
257251
extra_templates:
258-
# - sitemap.xml
252+
# - sitemap.xml

0 commit comments

Comments
 (0)