Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit da1af64

Browse files
committed
fix precision parsing for big query
1 parent b2ce754 commit da1af64

1 file changed

Lines changed: 29 additions & 14 deletions

File tree

data_diff/databases/bigquery.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ class Dialect(BaseDialect):
7575
}
7676
TYPE_ARRAY_RE = re.compile(r"ARRAY<(.+)>")
7777
TYPE_STRUCT_RE = re.compile(r"STRUCT<(.+)>")
78+
# [BIG]NUMERIC, [BIG]NUMERIC(precision, scale), [BIG]NUMERIC(precision)
79+
TYPE_NUMERIC_RE = re.compile(r'^((BIG)?NUMERIC)(?:\((\d+)(?:, (\d+))?\))?$')
7880
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#parameterized_decimal_type
7981
# The default scale is 9, which means a number can have up to 9 digits after the decimal point.
8082
DEFAULT_NUMERIC_PRECISION = 9
@@ -103,20 +105,33 @@ def parse_type(
103105
**kwargs: Any, # pass-through args
104106
) -> ColType:
105107
col_type = super().parse_type(table_path, col_name, type_repr, *args, **kwargs)
106-
if isinstance(col_type, UnknownColType):
107-
m = self.TYPE_ARRAY_RE.fullmatch(type_repr)
108-
if m:
109-
item_type = self.parse_type(table_path, col_name, m.group(1), *args, **kwargs)
110-
col_type = Array(item_type=item_type)
111-
112-
# We currently ignore structs' structure, but later can parse it too. Examples:
113-
# - STRUCT<INT64, STRING(10)> (unnamed)
114-
# - STRUCT<foo INT64, bar STRING(10)> (named)
115-
# - STRUCT<foo INT64, bar ARRAY<INT64>> (with complex fields)
116-
# - STRUCT<foo INT64, bar STRUCT<a INT64, b INT64>> (nested)
117-
m = self.TYPE_STRUCT_RE.fullmatch(type_repr)
118-
if m:
119-
col_type = Struct()
108+
if not isinstance(col_type, UnknownColType):
109+
return col_type
110+
111+
m = self.TYPE_ARRAY_RE.fullmatch(type_repr)
112+
if m:
113+
item_type = self.parse_type(table_path, col_name, m.group(1), *args, **kwargs)
114+
col_type = Array(item_type=item_type)
115+
return col_type
116+
117+
# We currently ignore structs' structure, but later can parse it too. Examples:
118+
# - STRUCT<INT64, STRING(10)> (unnamed)
119+
# - STRUCT<foo INT64, bar STRING(10)> (named)
120+
# - STRUCT<foo INT64, bar ARRAY<INT64>> (with complex fields)
121+
# - STRUCT<foo INT64, bar STRUCT<a INT64, b INT64>> (nested)
122+
m = self.TYPE_STRUCT_RE.fullmatch(type_repr)
123+
if m:
124+
col_type = Struct()
125+
return col_type
126+
127+
m = self.TYPE_NUMERIC_RE.fullmatch(type_repr)
128+
if m:
129+
precision = int(m.group(3)) if m.group(3) else None
130+
scale = int(m.group(4)) if m.group(4) else None
131+
col_type = Decimal(
132+
precision=scale if scale else 0 if precision else 9
133+
)
134+
return col_type
120135

121136
return col_type
122137

0 commit comments

Comments
 (0)