@@ -54,13 +54,13 @@ class HouseSchema(dy.Schema):
5454 price = dy.Float64(nullable = False )
5555
5656 @dy.rule ()
57- def reasonable_bathroom_to_bedrooom_ratio (cls ) -> pl.Expr:
57+ def reasonable_bathroom_to_bedroom_ratio (cls ) -> pl.Expr:
5858 ratio = pl.col(" num_bathrooms" ) / pl.col(" num_bedrooms" )
5959 return (ratio >= 1 / 3 ) & (ratio <= 3 )
6060```
6161
6262The decorator ` @dy.rule() ` "registers" the function as a rule using its name (i.e.
63- ` reasonable_bathroom_to_bedrooom_ratio ` ).
63+ ` reasonable_bathroom_to_bedroom_ratio ` ).
6464The returned expression provides a boolean value for each row of the data which evaluates to ` True ` whenever the data
6565are valid with respect to this rule.
6666
@@ -81,7 +81,7 @@ class HouseSchema(dy.Schema):
8181 price = dy.Float64(nullable = False )
8282
8383 @dy.rule ()
84- def reasonable_bathroom_to_bedrooom_ratio (cls ) -> pl.Expr:
84+ def reasonable_bathroom_to_bedroom_ratio (cls ) -> pl.Expr:
8585 ratio = pl.col(" num_bathrooms" ) / pl.col(" num_bedrooms" )
8686 return (ratio >= 1 / 3 ) & (ratio <= 3 )
8787
@@ -189,7 +189,7 @@ Using the `counts` method on the :class:`~dataframely.FailureInfo` object will r
189189
190190``` python
191191{
192- " reasonable_bathroom_to_bedrooom_ratio " : 1 ,
192+ " reasonable_bathroom_to_bedroom_ratio " : 1 ,
193193 " minimum_zip_code_count" : 2 ,
194194 " zip_code|min_length" : 1 ,
195195 " num_bedrooms|nullability" : 2 ,
@@ -205,6 +205,19 @@ failed_df = failure.invalid()
205205This information tends to be very useful in tracking down issues with the data,
206206both in productive systems and analytics environments.
207207
208+ ``` {note}
209+ New in `dataframely` v2.8.0: The `FailureInfo.details()` method now returns additional columns indicating which rules were violated for each row.
210+ ```
211+
212+ For the example above, ` failure.details() ` would look as follows (we omitted some columns for readability):
213+
214+ | zip_code | num_bedrooms | num_bathrooms | price | reasonable_bathroom_to_bedroom... | minimum_zip_code_count | zip_code\| min_length | num_bedrooms\| nullability | ... |
215+ | -------- | ------------ | ------------- | ------ | --------------------------------- | ---------------------- | -------------------- | ------------------------- | --- |
216+ | 1 | 1 | 1 | 50000 | valid | invalid | invalid | valid | |
217+ | 213 | null | 1 | 80000 | valid | valid | valid | invalid | |
218+ | 123 | null | 0 | 60000 | valid | invalid | valid | invalid | |
219+ | 213 | 2 | 8 | 160000 | invalid | valid | valid | valid | |
220+
208221## Type casting
209222
210223In rare cases, you might already be _ absolutely certain_ that a data frame is valid with
@@ -229,7 +242,8 @@ df_concat = HouseSchema.cast(pl.concat([df1, df2]))
229242Lastly, ` dataframely ` schemas can be used to integrate with external tools:
230243
231244- ` HouseSchema.create_empty() ` creates an empty ` dy.DataFrame[HouseSchema] ` that can be used for testing
232- - ` HouseSchema.to_sqlalchemy_columns() ` provides a list of [ sqlalchemy] ( https://www.sqlalchemy.org ) columns that can be used to
245+ - ` HouseSchema.to_sqlalchemy_columns() ` provides a list of [ sqlalchemy] ( https://www.sqlalchemy.org ) columns that can be
246+ used to
233247 create SQL tables using types and constraints in line with the schema
234248- ` HouseSchema.to_pyarrow_schema() ` provides a [ pyarrow] ( https://arrow.apache.org/docs/python/index.html ) schema with
235249 appropriate column dtypes and nullability information
0 commit comments