|
| 1 | +//> using dep org.virtuslab::scala-yaml::0.3.1 |
| 2 | +//> using dep co.fs2::fs2-io::3.12.2 |
| 3 | + |
| 4 | +import cats.effect.{IO, IOApp} |
| 5 | +import cats.syntax.all.* |
| 6 | +import fs2.io.file.{Files, Path} |
| 7 | +import org.virtuslab.yaml.* |
| 8 | + |
| 9 | +case class PostMeta(author: Option[String]) derives YamlCodec |
| 10 | + |
| 11 | +case class Conf(title: String, category: Option[String], meta: Option[PostMeta]) |
| 12 | + derives YamlCodec |
| 13 | + |
| 14 | +case class Post(conf: Conf, content: String, originalYaml: String) { |
| 15 | + |
| 16 | + def cleanPostUrl(markdown: String): String = { |
| 17 | + // Replace {% post_url YYYY-MM-DD-filename %} with filename.md |
| 18 | + val postUrlPattern = """\{%\s*post_url\s+\d{4}-\d{2}-\d{2}-(.+?)\s*%\}""".r |
| 19 | + postUrlPattern.replaceAllIn(markdown, "$1.md") |
| 20 | + } |
| 21 | + |
| 22 | + def cleanOtherLinks(markdown: String): String = { |
| 23 | + var cleaned = markdown |
| 24 | + |
| 25 | + // Replace absolute typelevel.org blog URLs: https://typelevel.org/blog/YYYY/MM/DD/post-name.html with post-name.md |
| 26 | + val typelevelBlogPattern = |
| 27 | + """https://typelevel\.org/blog/\d{4}/\d{2}/\d{2}/([^)\s]+)\.html""".r |
| 28 | + cleaned = typelevelBlogPattern.replaceAllIn(cleaned, "$1.md") |
| 29 | + |
| 30 | + // Replace relative blog URLs: /blog/YYYY/MM/DD/post-name.html with post-name.md |
| 31 | + val relativeBlogPattern = |
| 32 | + """(?<![a-z])/blog/\d{4}/\d{2}/\d{2}/([^)\s]+)\.html""".r |
| 33 | + cleaned = relativeBlogPattern.replaceAllIn(cleaned, "$1.md") |
| 34 | + |
| 35 | + // Replace Jekyll site.url variables: {{ site.url }}/... with /... |
| 36 | + val siteUrlPattern = """\{\{\s*site\.url\s*\}\}""".r |
| 37 | + cleaned = siteUrlPattern.replaceAllIn(cleaned, "") |
| 38 | + |
| 39 | + // Replace .html extensions with .md in relative links (but not absolute URLs starting with http) |
| 40 | + val htmlToMdPattern = """(?<!https?://[^\s)]*)(\.html)""".r |
| 41 | + cleaned = htmlToMdPattern.replaceAllIn(cleaned, ".md") |
| 42 | + |
| 43 | + // Fix links |
| 44 | + cleaned = cleaned.replace("/conduct.md", "/code-of-conduct/README.md") |
| 45 | + cleaned = cleaned.replace("/code-of-conduct.md", "/code-of-conduct/README.md") |
| 46 | + cleaned = cleaned.replace("/projects", "/projects/README.md") |
| 47 | + cleaned = cleaned.replace("/gsoc/ideas", "/gsoc/ideas.md") |
| 48 | + |
| 49 | + cleaned |
| 50 | + } |
| 51 | + |
| 52 | + def buildHoconMetadata(date: String): String = { |
| 53 | + val authorLine = conf.meta.flatMap(_.author).map(a => s" author: $${$a}") |
| 54 | + val dateLine = Some(s""" date: "$date"""") |
| 55 | + val tagsLine = conf.category.map(c => s" tags: [$c]") |
| 56 | + |
| 57 | + List( |
| 58 | + Some("{%"), |
| 59 | + authorLine, |
| 60 | + dateLine, |
| 61 | + tagsLine, |
| 62 | + Some("%}") |
| 63 | + ).flatten.mkString("\n") |
| 64 | + } |
| 65 | + |
| 66 | + def toLaika(date: String, stage: Int): String = { |
| 67 | + val metadata = buildHoconMetadata(date) |
| 68 | + val title = s"# ${conf.title}" |
| 69 | + |
| 70 | + stage match { |
| 71 | + case 1 => |
| 72 | + // Stage 1: Just move to new location, keep original format |
| 73 | + s"---\n$originalYaml---\n\n$content\n" |
| 74 | + |
| 75 | + case 2 => |
| 76 | + // Stage 2: HOCON metadata + title, no content changes |
| 77 | + s"$metadata\n\n$title\n\n$content\n" |
| 78 | + |
| 79 | + case 3 => |
| 80 | + // Stage 3: Stage 2 + post_url substitution |
| 81 | + val transformedContent = cleanPostUrl(content) |
| 82 | + s"$metadata\n\n$title\n\n$transformedContent\n" |
| 83 | + |
| 84 | + case _ => |
| 85 | + // Stage 4+: All transformations |
| 86 | + val transformedContent = cleanOtherLinks(cleanPostUrl(content)) |
| 87 | + s"$metadata\n\n$title\n\n$transformedContent\n" |
| 88 | + } |
| 89 | + } |
| 90 | +} |
| 91 | + |
| 92 | +object PostParser { |
| 93 | + def parse(path: Path, content: String): Either[Throwable, Post] = { |
| 94 | + // Normalize Windows line endings to Unix |
| 95 | + val normalized = content.replace("\r\n", "\n") |
| 96 | + val parts = normalized.split("---\n", 3) |
| 97 | + if (parts.length < 3) { |
| 98 | + val fn = path.fileName |
| 99 | + Left(new Exception(s"Invalid post '$fn': no YAML front matter found")) |
| 100 | + } else { |
| 101 | + val yamlContent = parts(1) |
| 102 | + val markdownContent = parts(2).trim |
| 103 | + yamlContent.as[Conf].map(conf => Post(conf, markdownContent, yamlContent)) |
| 104 | + } |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +object MigratePosts extends IOApp { |
| 109 | + val oldPostsDir = Path("../typelevel.github.com/collections/_posts") |
| 110 | + val newBlogDir = Path("src/blog") |
| 111 | + |
| 112 | + // Manual renaming map for files that would collide after date stripping |
| 113 | + val renameMap: Map[String, String] = Map( |
| 114 | + "2023-02-23-gsoc.md" -> "gsoc-2023.md", |
| 115 | + "2024-03-02-gsoc.md" -> "gsoc-2024.md", |
| 116 | + "2025-02-27-gsoc.md" -> "gsoc-2025.md", |
| 117 | + ) |
| 118 | + |
| 119 | + def getDateAndName(path: Path): Either[Throwable, (String, String)] = { |
| 120 | + val filename = path.fileName.toString |
| 121 | + val datePattern = """(\d{4}-\d{2}-\d{2})-(.+)""".r |
| 122 | + filename match { |
| 123 | + case datePattern(date, rest) => |
| 124 | + val newName = renameMap.getOrElse(filename, rest) |
| 125 | + Right((date, newName)) |
| 126 | + case _ => |
| 127 | + Left(new Exception(s"Filename doesn't match pattern: $filename")) |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + def readPost(path: Path): IO[String] = Files[IO] |
| 132 | + .readAll(path) |
| 133 | + .through(fs2.text.utf8.decode) |
| 134 | + .compile |
| 135 | + .string |
| 136 | + |
| 137 | + def writePost(path: Path, content: String): IO[Unit] = fs2.Stream |
| 138 | + .emit(content) |
| 139 | + .through(fs2.text.utf8.encode) |
| 140 | + .through(Files[IO].writeAll(path)) |
| 141 | + .compile |
| 142 | + .drain |
| 143 | + |
| 144 | + def migratePost(sourcePath: Path, stage: Int): IO[String] = for { |
| 145 | + (date, newFilename) <- IO.fromEither(getDateAndName(sourcePath)) |
| 146 | + content <- readPost(sourcePath) |
| 147 | + post <- IO.fromEither(PostParser.parse(sourcePath, content)) |
| 148 | + laikaContent = post.toLaika(date, stage) |
| 149 | + destPath = newBlogDir / newFilename |
| 150 | + _ <- writePost(destPath, laikaContent) |
| 151 | + } yield newFilename |
| 152 | + |
| 153 | + def migrateAllPosts(stage: Int): IO[Long] = Files[IO] |
| 154 | + .list(oldPostsDir) |
| 155 | + .filter(_.fileName.toString.matches("""^\d{4}-\d{2}-\d{2}-.+\.md$""")) |
| 156 | + .evalMap(path => migratePost(path, stage)) |
| 157 | + .evalMap(newFilename => IO.println(s"Migrated: $newFilename")) |
| 158 | + .compile |
| 159 | + .count |
| 160 | + |
| 161 | + def run(args: List[String]): IO[cats.effect.ExitCode] = { |
| 162 | + val stage = args.headOption.flatMap(_.toIntOption).getOrElse(4) |
| 163 | + IO.println(s"Running migration with stage $stage") *> |
| 164 | + migrateAllPosts(stage) |
| 165 | + .flatMap(c => IO.println(s"Migrated $c posts")) |
| 166 | + .as(cats.effect.ExitCode.Success) |
| 167 | + } |
| 168 | +} |
0 commit comments