Skip to content

Commit cee5a5e

Browse files
committed
Add migrate-posts script
1 parent 3cf3f4e commit cee5a5e

1 file changed

Lines changed: 168 additions & 0 deletions

File tree

migrate-posts.scala

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
//> using dep org.virtuslab::scala-yaml::0.3.1
2+
//> using dep co.fs2::fs2-io::3.12.2
3+
4+
import cats.effect.{IO, IOApp}
5+
import cats.syntax.all.*
6+
import fs2.io.file.{Files, Path}
7+
import org.virtuslab.yaml.*
8+
9+
case class PostMeta(author: Option[String]) derives YamlCodec
10+
11+
case class Conf(title: String, category: Option[String], meta: Option[PostMeta])
12+
derives YamlCodec
13+
14+
case class Post(conf: Conf, content: String, originalYaml: String) {
15+
16+
def cleanPostUrl(markdown: String): String = {
17+
// Replace {% post_url YYYY-MM-DD-filename %} with filename.md
18+
val postUrlPattern = """\{%\s*post_url\s+\d{4}-\d{2}-\d{2}-(.+?)\s*%\}""".r
19+
postUrlPattern.replaceAllIn(markdown, "$1.md")
20+
}
21+
22+
def cleanOtherLinks(markdown: String): String = {
23+
var cleaned = markdown
24+
25+
// Replace absolute typelevel.org blog URLs: https://typelevel.org/blog/YYYY/MM/DD/post-name.html with post-name.md
26+
val typelevelBlogPattern =
27+
"""https://typelevel\.org/blog/\d{4}/\d{2}/\d{2}/([^)\s]+)\.html""".r
28+
cleaned = typelevelBlogPattern.replaceAllIn(cleaned, "$1.md")
29+
30+
// Replace relative blog URLs: /blog/YYYY/MM/DD/post-name.html with post-name.md
31+
val relativeBlogPattern =
32+
"""(?<![a-z])/blog/\d{4}/\d{2}/\d{2}/([^)\s]+)\.html""".r
33+
cleaned = relativeBlogPattern.replaceAllIn(cleaned, "$1.md")
34+
35+
// Replace Jekyll site.url variables: {{ site.url }}/... with /...
36+
val siteUrlPattern = """\{\{\s*site\.url\s*\}\}""".r
37+
cleaned = siteUrlPattern.replaceAllIn(cleaned, "")
38+
39+
// Replace .html extensions with .md in relative links (but not absolute URLs starting with http)
40+
val htmlToMdPattern = """(?<!https?://[^\s)]*)(\.html)""".r
41+
cleaned = htmlToMdPattern.replaceAllIn(cleaned, ".md")
42+
43+
// Fix links
44+
cleaned = cleaned.replace("/conduct.md", "/code-of-conduct/README.md")
45+
cleaned = cleaned.replace("/code-of-conduct.md", "/code-of-conduct/README.md")
46+
cleaned = cleaned.replace("/projects", "/projects/README.md")
47+
cleaned = cleaned.replace("/gsoc/ideas", "/gsoc/ideas.md")
48+
49+
cleaned
50+
}
51+
52+
def buildHoconMetadata(date: String): String = {
53+
val authorLine = conf.meta.flatMap(_.author).map(a => s" author: $${$a}")
54+
val dateLine = Some(s""" date: "$date"""")
55+
val tagsLine = conf.category.map(c => s" tags: [$c]")
56+
57+
List(
58+
Some("{%"),
59+
authorLine,
60+
dateLine,
61+
tagsLine,
62+
Some("%}")
63+
).flatten.mkString("\n")
64+
}
65+
66+
def toLaika(date: String, stage: Int): String = {
67+
val metadata = buildHoconMetadata(date)
68+
val title = s"# ${conf.title}"
69+
70+
stage match {
71+
case 1 =>
72+
// Stage 1: Just move to new location, keep original format
73+
s"---\n$originalYaml---\n\n$content\n"
74+
75+
case 2 =>
76+
// Stage 2: HOCON metadata + title, no content changes
77+
s"$metadata\n\n$title\n\n$content\n"
78+
79+
case 3 =>
80+
// Stage 3: Stage 2 + post_url substitution
81+
val transformedContent = cleanPostUrl(content)
82+
s"$metadata\n\n$title\n\n$transformedContent\n"
83+
84+
case _ =>
85+
// Stage 4+: All transformations
86+
val transformedContent = cleanOtherLinks(cleanPostUrl(content))
87+
s"$metadata\n\n$title\n\n$transformedContent\n"
88+
}
89+
}
90+
}
91+
92+
object PostParser {
93+
def parse(path: Path, content: String): Either[Throwable, Post] = {
94+
// Normalize Windows line endings to Unix
95+
val normalized = content.replace("\r\n", "\n")
96+
val parts = normalized.split("---\n", 3)
97+
if (parts.length < 3) {
98+
val fn = path.fileName
99+
Left(new Exception(s"Invalid post '$fn': no YAML front matter found"))
100+
} else {
101+
val yamlContent = parts(1)
102+
val markdownContent = parts(2).trim
103+
yamlContent.as[Conf].map(conf => Post(conf, markdownContent, yamlContent))
104+
}
105+
}
106+
}
107+
108+
object MigratePosts extends IOApp {
109+
val oldPostsDir = Path("../typelevel.github.com/collections/_posts")
110+
val newBlogDir = Path("src/blog")
111+
112+
// Manual renaming map for files that would collide after date stripping
113+
val renameMap: Map[String, String] = Map(
114+
"2023-02-23-gsoc.md" -> "gsoc-2023.md",
115+
"2024-03-02-gsoc.md" -> "gsoc-2024.md",
116+
"2025-02-27-gsoc.md" -> "gsoc-2025.md",
117+
)
118+
119+
def getDateAndName(path: Path): Either[Throwable, (String, String)] = {
120+
val filename = path.fileName.toString
121+
val datePattern = """(\d{4}-\d{2}-\d{2})-(.+)""".r
122+
filename match {
123+
case datePattern(date, rest) =>
124+
val newName = renameMap.getOrElse(filename, rest)
125+
Right((date, newName))
126+
case _ =>
127+
Left(new Exception(s"Filename doesn't match pattern: $filename"))
128+
}
129+
}
130+
131+
def readPost(path: Path): IO[String] = Files[IO]
132+
.readAll(path)
133+
.through(fs2.text.utf8.decode)
134+
.compile
135+
.string
136+
137+
def writePost(path: Path, content: String): IO[Unit] = fs2.Stream
138+
.emit(content)
139+
.through(fs2.text.utf8.encode)
140+
.through(Files[IO].writeAll(path))
141+
.compile
142+
.drain
143+
144+
def migratePost(sourcePath: Path, stage: Int): IO[String] = for {
145+
(date, newFilename) <- IO.fromEither(getDateAndName(sourcePath))
146+
content <- readPost(sourcePath)
147+
post <- IO.fromEither(PostParser.parse(sourcePath, content))
148+
laikaContent = post.toLaika(date, stage)
149+
destPath = newBlogDir / newFilename
150+
_ <- writePost(destPath, laikaContent)
151+
} yield newFilename
152+
153+
def migrateAllPosts(stage: Int): IO[Long] = Files[IO]
154+
.list(oldPostsDir)
155+
.filter(_.fileName.toString.matches("""^\d{4}-\d{2}-\d{2}-.+\.md$"""))
156+
.evalMap(path => migratePost(path, stage))
157+
.evalMap(newFilename => IO.println(s"Migrated: $newFilename"))
158+
.compile
159+
.count
160+
161+
def run(args: List[String]): IO[cats.effect.ExitCode] = {
162+
val stage = args.headOption.flatMap(_.toIntOption).getOrElse(4)
163+
IO.println(s"Running migration with stage $stage") *>
164+
migrateAllPosts(stage)
165+
.flatMap(c => IO.println(s"Migrated $c posts"))
166+
.as(cats.effect.ExitCode.Success)
167+
}
168+
}

0 commit comments

Comments
 (0)