Mathematical-Foundations-of-Reinforcement-Learning-Notes/mkdocs.yml at main · wgyhhhh/Mathematical-Foundations-of-Reinforcement-Learning-Notes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
site_name: 强化学习中的数学原理
site_url: https://wgyhhhh.github.io/Mathematical-Foundations-of-Reinforcement-Learning-Notes/
use_directory_urls: true
repo_url: https://github.com/wgyhhhh/Mathematical-Foundations-of-Reinforcement-Learning-Notes
repo_name: wgyhhhh/Mathematical-Foundations-of-Reinforcement-Learning-Notes
site_author: Guangyu
site_description: 强化学习中的数学原理

theme:
  icon:
    repo: fontawesome/brands/github-alt
  name: material
  palette:
    - media: "(prefers-color-scheme: light)"
      scheme: default
      primary: black
      accent: green
      toggle:
        icon: material/weather-sunny
        name: 明亮主题
    - media: "(prefers-color-scheme: dark)"
      scheme: slate
      primary: black
      accent: yellow
      toggle:
        icon: material/weather-night
        name: 暗黑主题
  logo: img/logo.jpg
  favicon: img/favicon-96x96.png
  custom_dir: docs/overrides

  features:
    - announce.dismiss #呈现可标记为由用户读取的临时公告，可以包含一个用于取消当前公告的按钮
    # - navigation.instant
    #- header.autohide  #自动隐藏
    - navigation.tracking #地址栏中的 URL 将自动更新为在目录中突出显示的活动锚点
    - navigation.tabs #顶级部分将呈现在上面视口标题下方的菜单层中，但在移动设备上保持原样
    # - navigation.tabs.sticky  #启用粘性选项卡后，导航选项卡将锁定在标题下方，并在向下滚动时始终保持可见
    # - navigation.sections # 启用后会以分组形式常驻展开，不适合需要一级可折叠目录的场景
    - navigation.top # 返回顶部的按钮 在上滑时出现
    - navigation.footer #页脚将呈现在边栏中，但在移动设备上保持原样
    - search.suggest # 搜索输入一些字母时推荐补全整个单词
    - search.highlight # 搜索出的文章关键词加入高亮
    - search.share #搜索分享按钮
    - navigation.indexes #启用节索引页后，可以将文档直接附加到节
    - content.tabs.link
    - content.tooltips
    - content.code.copy #代码复制
    - content.action.edit
    - content.action.view
    - content.code.annotate
  language: zh # 一些提示性的文字会变成中文


extra:
  social: # 社交联系方式（页面右下角内容）
    - icon: fontawesome/brands/github
      link: https://github.com/wgyhhhh/
      name: GitHub
    - icon: fontawesome/brands/bilibili
      link: https://space.bilibili.com/281217178
      name: 哔哩哔哩
    - icon: fontawesome/solid/paper-plane
      link: mailto:wgyhhh001@gmail.com
      name: 联系作者
  version:
    provider: mike
    alias: true
  consent:
    actions:
      - accept
      - reject
      - manage
    title: Cookie 设置
    description: >-
      我们使用 cookies 来识别您的重复访问和偏好，以及衡量我们文档的有效性和用户是否找
      到他们正在搜索的内容。<br/>
      在您的同意下，您将帮助我们改进我们的文档。<br/>
      （您稍后仍可以在网页左下角重新修改 cookies 设置）
    analytics: # 谷歌统计
    provider: google
    property: G-C5GHN09YHC
    feedback:
      title: 该页面对你有帮助吗？
      ratings:
        - icon: material/emoticon-happy-outline
          name: 有帮助
          data: 1
          note: 感谢您的反馈！
        - icon: material/emoticon-neutral-outline
          name: 一般般
          data: 0
          note: 感谢您的反馈！
        - icon: material/emoticon-sad-outline
          name: 有待改进
          data: -1
          note: 感谢您的反馈！
nav:
  - 强化学习中的数学原理笔记:
    - 主页:
      - 欢迎: 'index.md'
      - 第一版序言: 'Preface1.md'
      - 第二版序言: 'Preface2.md'
    - 第一章:
      - 介绍: 'Chapter-1/intro.md'
      - 1.1-网格世界: 'Chapter-1/1-1.md'
      - 1.2-状态和行动: 'Chapter-1/1-2.md'
      - 1.3-状态转移: 'Chapter-1/1-3.md'
      - 1.4-策略: 'Chapter-1/1-4.md'
      - 1.5-奖励: 'Chapter-1/1-5.md'
      - 1.6-轨迹、回报、回合: 'Chapter-1/1-6.md'
      - 1.7-马尔科夫决策过程: 'Chapter-1/1-7.md'
      - 1.8-总结: 'Chapter-1/1-8.md'
    - 第二章:
      - 介绍: 'Chapter-2/intro.md'
      - 2.1-为什么回报很重要?: 'Chapter-2/2-1.md'
      - 2.2-如何计算回报?: 'Chapter-2/2-2.md'
      - 2.3-状态值: 'Chapter-2/2-3.md'
      - 2.4-贝尔曼方程: 'Chapter-2/2-4.md'
      - 2.5-贝尔曼方程的例子: 'Chapter-2/2-5.md'
      - 2.6-贝尔曼方程的矩阵形式: 'Chapter-2/2-6.md'
      - 2.7-求解状态值: 'Chapter-2/2-7.md'
      - 2.8-行动值: 'Chapter-2/2-8.md'
      - 2.9-总结: 'Chapter-2/2-9.md'
    - 第三章:
      - 介绍: 'Chapter-3/intro.md'
      - 3.1-如何改进策略: 'Chapter-3/3-1.md'
      - 3.2-最优状态值和最优策略: 'Chapter-3/3-2.md'
      - 3.3-贝尔曼最优公式: 'Chapter-3/3-3.md'
      - 3.4-从贝尔曼最优公式中求解最优策略: 'Chapter-3/3-4.md'
      - 3.5-影响最优策略的因素: 'Chapter-3/3-5.md'
      - 3.6-总结: 'Chapter-3/3-6.md'
    - 第四章:
      - 介绍: 'Chapter-4/intro.md'
      - 4.1-值迭代: 'Chapter-4/4-1.md'
      - 4.2-策略迭代: 'Chapter-4/4-2.md'
      - 4.3-截断策略迭代: 'Chapter-4/4-3.md'
      - 4.4-总结: 'Chapter-4/4-4.md'
    - 第五章:
      - 介绍: 'Chapter-5/intro.md'
      - 5.1-启发示例:期望值估计: 'Chapter-5/5-1.md'
      - 5.2-MC Basic:最简单的基于蒙特卡洛的算法: 'Chapter-5/5-2.md'
      - 5.3-MC Exploring Starts算法: 'Chapter-5/5-3.md'
      - 5.4-MC-Greedy算法: 'Chapter-5/5-4.md'
      - 5.5-探索与利用:以Greedy策略为例: 'Chapter-5/5-5.md'
      - 5.6-总结: 'Chapter-5/5-6.md'
    - 第六章:
      - 介绍: 'Chapter-6/intro.md'
      - 6.1-启发示例:期望值估计: 'Chapter-6/6-1.md'
      - 6.2-罗宾斯-门罗算法: 'Chapter-6/6-2.md'
      - 6.3-Dvoretzky定理: 'Chapter-6/6-3.md'
      - 6.4-随机梯度下降: 'Chapter-6/6-4.md'
      - 6.5-总结: 'Chapter-6/6-5.md'
    - 第七章:
      - 介绍: 'Chapter-7/intro.md'
      - 7.1-状态值估计:时序差分算法: 'Chapter-7/7-1.md'
      - 7.2-行动值估计:Sarsa: 'Chapter-7/7-2.md'
      - 7.3-行动值估计:n步Sarsa: 'Chapter-7/7-3.md'
      - 7.4-最优行动值估计:Q-Learning: 'Chapter-7/7-4.md'
      - 7.5-时序差分算法的统一框架: 'Chapter-7/7-5.md'
      - 7.6-总结: 'Chapter-7/7-6.md'
    - 第八章:
      - 介绍: 'Chapter-8/intro.md'
      - 8.1-价值表示:从表格到函数: 'Chapter-8/8-1.md'
      - 8.2-基于值函数的时序差分算法:状态值估计: 'Chapter-8/8-2.md'
      - 8.3-基于值函数的时序差分算法:行动值估计: 'Chapter-8/8-3.md'
      - 8.4-深度Q-learning: 'Chapter-8/8-4.md'
      - 8.5-总结: 'Chapter-8/8-5.md'
    - 第九章:
      - 介绍: 'Chapter-9/intro.md'
      - 9.1-策略表示:从表格到函数: 'Chapter-9/9-1.md'
      - 9.2-目标函数:定义最优策略: 'Chapter-9/9-2.md'
      - 9.3-目标函数的梯度: 'Chapter-9/9-3.md'
      - 9.4-蒙特卡洛策略梯度: 'Chapter-9/9-4.md'
      - 9.5-总结: 'Chapter-9/9-5.md'
    - 第十章:
      - 介绍: 'Chapter-10/intro.md'
      - 10.1-最简单的演员-评论性方法: 'Chapter-10/10-1.md'
      - 10.2-优势演员-评论性方法: 'Chapter-10/10-2.md'
      - 10.3-异策略演员-评论性方法: 'Chapter-10/10-3.md'
      - 10.4-确定性演员-评论性方法: 'Chapter-10/10-4.md'
      - 10.5-总结: 'Chapter-10/10-5.md'
    - Box(证明):
      - 介绍: 'Box/intro.md'
      - 2.1:迭代解法的收敛性证明: 'Box/Box-2-1.md'
      - 7.1:TD算法的推导: 'Box/Box-7-1.md'
      - 7.4:期望Sarsa算法: 'Box/Box-7-4.md'
    - 附录:
      - 术语: 'Appendix/1.md'
  - 核心算法实现:
    - 策略迭代与值迭代: 'python/code.ipynb'

plugins:
  - search
  - mkdocs-jupyter

markdown_extensions: #详见https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown-extensions/ 和 https://squidfunk.github.io/mkdocs-material/setup/extensions/python-markdown/
  - abbr
  - attr_list
  - admonition
  - def_list
  - footnotes
  - md_in_html
  - meta # 支持Markdown文件上方自定义标题标签等
  - pymdownx.caret
  - pymdownx.betterem
  - pymdownx.critic
  - pymdownx.details
  - pymdownx.inlinehilite
  - pymdownx.keys
  - pymdownx.mark
  - pymdownx.snippets
  - pymdownx.smartsymbols
  - pymdownx.tilde
  - pymdownx.superfences:
      custom_fences:
        - name: mermaid
          class: mermaid
          format:
            !!python/name:pymdownx.superfences.fence_code_format # 代码块高亮插件
  - pymdownx.arithmatex: # latex支持
      generic: true
  - toc:
      permalink: true # 固定标题位置为当前位置
  - pymdownx.highlight: # 代码块高亮
      anchor_linenums: true
      linenums: true # 显示行号
      # auto_title: true # 显示编程语言名称
  - pymdownx.tabbed:
      alternate_style: true
  - pymdownx.tasklist:
      custom_checkbox: true

extra_javascript:
  # - javascripts/extra.js #自定义javascript
  - https://Xiaokang2022.github.io/maliang/js/click-colorful.js # 鼠标点击效果
  #- https://cdn.jsdelivr.net/npm/mathjax@2/MathJax.js #Latex支持
  - https://polyfill.io/v3/polyfill.min.js?features=es6 #Latex支持
  # - ckplayer/js/ckplayer.js #播放器配置
  # - https://cdn.jsdelivr.net/npm/gitalk@latest/dist/gitalk.min.js  #gitalk支持
  - https://cdn.jsdelivr.net/npm/mermaid@10.0.2/dist/add-html-label-6e56ed67.min.js #忘了
  - javascripts/extra.js
  - javascripts/mathjax.js #MathJax配置
  - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js # 单次加载，保留原始渲染风格


extra_css:
  - css/misc.css #自定义css
  - stylesheets/extra.css
extra_templates:
# - sitemap.xml