Skip to content

Commit c846677

Browse files
borcheroAndreasAlbertQCAndreasAlbertAndreas Albert
authored
fix: Support polars 1.38 (#270)
Co-authored-by: Andreas Albert <andreas.albert@quantco.com> Co-authored-by: Andreas Albert <AndreasAlbert@users.noreply.github.com> Co-authored-by: Andreas Albert <a@b.c>
1 parent c92f653 commit c846677

8 files changed

Lines changed: 213 additions & 211 deletions

File tree

dataframely/_compat.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,14 @@ class Dialect: # type: ignore # noqa: N801
8080
from polars._typing import ( # type: ignore[attr-defined,unused-ignore]
8181
PartitioningScheme as PartitionSchemeOrSinkDirectory,
8282
)
83-
else:
83+
elif _polars_version_tuple < (1, 38): # pragma: no cover
8484
from polars.io.partition import ( # type: ignore[no-redef,attr-defined,unused-ignore]
8585
_SinkDirectory as PartitionSchemeOrSinkDirectory,
8686
)
87+
else:
88+
from polars.io.partition import ( # type: ignore[no-redef,attr-defined,unused-ignore]
89+
PartitionBy as PartitionSchemeOrSinkDirectory,
90+
)
8791

8892
# ------------------------------------------------------------------------------------ #
8993

dataframely/_storage/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# SPDX-License-Identifier: BSD-3-Clause
33

44
from ._base import StorageBackend
5+
from ._fsspec import get_file_prefix
56

6-
__all__ = [
7-
"StorageBackend",
8-
]
7+
__all__ = ["StorageBackend", "get_file_prefix"]

dataframely/_storage/_fsspec.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) QuantCo 2025-2026
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
from fsspec import AbstractFileSystem
5+
6+
7+
def get_file_prefix(fs: AbstractFileSystem) -> str:
8+
match fs.protocol:
9+
case "file":
10+
return ""
11+
case str():
12+
return f"{fs.protocol}://"
13+
case ["file", *_]:
14+
return ""
15+
case [str(proto), *_]:
16+
return f"{proto}://"
17+
case _:
18+
raise ValueError(f"Unexpected fs.protocol: {fs.protocol}")

dataframely/_storage/parquet.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import polars as pl
88
from fsspec import AbstractFileSystem, url_to_fs
99

10+
from dataframely._storage import get_file_prefix
11+
1012
from ._base import (
1113
SerializedCollection,
1214
SerializedRules,
@@ -79,7 +81,9 @@ def sink_collection(
7981
fs: AbstractFileSystem = url_to_fs(path)[0]
8082
for key, lf in dfs.items():
8183
destination = (
82-
fs.sep.join([path, key])
84+
# Enforce that the path ends with a separator. Otherwise
85+
# polars misbehaves on Windows.
86+
fs.sep.join([path, key]) + fs.sep
8387
if "partition_by" in kwargs
8488
else fs.sep.join([path, f"{key}.parquet"])
8589
)
@@ -107,7 +111,9 @@ def write_collection(
107111
fs: AbstractFileSystem = url_to_fs(path)[0]
108112
for key, lf in dfs.items():
109113
destination = (
110-
fs.sep.join([path, key])
114+
# Enforce that the path ends with a separator. Otherwise
115+
# polars misbehaves on Windows.
116+
fs.sep.join([path, key]) + fs.sep
111117
if "partition_by" in kwargs
112118
else fs.sep.join([path, f"{key}.parquet"])
113119
)
@@ -155,15 +161,7 @@ def _collection_from_parquet(
155161
if is_file:
156162
collection_types.append(_read_serialized_collection(source_path))
157163
else:
158-
prefix = (
159-
""
160-
if fs.protocol == "file"
161-
else (
162-
f"{fs.protocol}://"
163-
if isinstance(fs.protocol, str)
164-
else f"{fs.protocol[0]}://"
165-
)
166-
)
164+
prefix = get_file_prefix(fs)
167165
for file in fs.glob(fs.sep.join([source_path, "**", "*.parquet"])):
168166
collection_types.append(
169167
_read_serialized_collection(f"{prefix}{file}")

dataframely/testing/storage.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import dataframely as dy
1111
from dataframely import FailureInfo, Validation
1212
from dataframely._compat import deltalake
13+
from dataframely._storage import get_file_prefix
1314
from dataframely._storage.delta import _to_delta_table
1415

1516
# ----------------------------------- Schema -------------------------------------------
@@ -190,19 +191,7 @@ def set_metadata(self, path: str, metadata: dict[str, Any]) -> None:
190191
metadata."""
191192

192193
def _prefix_path(self, path: str, fs: AbstractFileSystem) -> str:
193-
return f"{self._get_prefix(fs)}{path}"
194-
195-
@staticmethod
196-
def _get_prefix(fs: AbstractFileSystem) -> str:
197-
return (
198-
""
199-
if fs.protocol == "file"
200-
else (
201-
f"{fs.protocol}://"
202-
if isinstance(fs.protocol, str)
203-
else f"{fs.protocol[0]}://"
204-
)
205-
)
194+
return f"{get_file_prefix(fs)}{path}"
206195

207196

208197
class ParquetCollectionStorageTester(CollectionStorageTester):

pixi.lock

Lines changed: 141 additions & 173 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pixi.toml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,35 +117,35 @@ polars = "1.35.*"
117117

118118
[feature.polars-latest.dependencies]
119119
# NOTE: Update docs/faq.md when updating this.
120-
polars = "1.36.*"
120+
polars = "1.38.*"
121121

122122
[feature.nightly.tasks]
123123
install-polars-nightly = "pip install --pre --no-deps --upgrade --only-binary :all: polars polars-runtime-32"
124124

125125
[environments]
126126
build = ["build"]
127-
default = ["dev", "lint", "optionals", "py314", "test", "polars-latest"]
127+
default = ["dev", "lint", "optionals", "polars-latest", "py314", "test"]
128128
docs = ["docs"]
129129
lint = { features = ["lint"], no-default-feature = true }
130130
nightly = ["nightly", "optionals", "test"]
131131

132132
# Different python versions with the latest polars
133-
py310 = ["py310", "test", "polars-latest"]
134-
py311 = ["py311", "test", "polars-latest"]
135-
py312 = ["py312", "test", "polars-latest"]
136-
py313 = ["py313", "test", "polars-latest"]
137-
py314 = ["py314", "test", "polars-latest"]
133+
py310 = ["polars-latest", "py310", "test"]
134+
py311 = ["polars-latest", "py311", "test"]
135+
py312 = ["polars-latest", "py312", "test"]
136+
py313 = ["polars-latest", "py313", "test"]
137+
py314 = ["polars-latest", "py314", "test"]
138138

139139
# Test with optional dependencies
140-
py314-optionals = ["optionals", "py314", "test", "polars-latest"]
140+
py314-optionals = ["optionals", "polars-latest", "py314", "test"]
141141

142142
# Polars compatibility envs
143-
polars-minimal = ["py314", "test", "polars-minimal"]
144143
default-polars-minimal = [
145144
"dev",
146145
"lint",
147146
"optionals",
147+
"polars-minimal",
148148
"py314",
149149
"test",
150-
"polars-minimal",
151150
]
151+
polars-minimal = ["polars-minimal", "py314", "test"]

tests/storage/test_fsspec.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright (c) QuantCo 2025-2026
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
from typing import Any
5+
6+
import pytest
7+
8+
from dataframely._storage._fsspec import get_file_prefix
9+
10+
11+
class MockFS:
12+
def __init__(self, protocol: Any) -> None:
13+
self.protocol = protocol
14+
15+
16+
def test_get_file_prefix() -> None:
17+
assert get_file_prefix(MockFS("file")) == ""
18+
assert get_file_prefix(MockFS("s3")) == "s3://"
19+
assert get_file_prefix(MockFS(["file", "whatever"])) == ""
20+
assert get_file_prefix(MockFS(["s3", "whatever"])) == "s3://"
21+
22+
23+
@pytest.mark.parametrize("protocol", [5, None, [5]])
24+
def test_get_file_prefix_invalid(protocol: Any) -> None:
25+
with pytest.raises(ValueError):
26+
assert 1 == get_file_prefix(MockFS(protocol))

0 commit comments

Comments
 (0)