Skip to content

Commit 2c59cf3

Browse files
maskri17copybara-github
authored andcommitted
Adding the CEL regex extensions
PiperOrigin-RevId: 782160181
1 parent 69fb7f7 commit 2c59cf3

4 files changed

Lines changed: 737 additions & 0 deletions

File tree

extensions/BUILD

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,54 @@ cc_library(
661661
],
662662
)
663663

664+
cc_library(
665+
name = "regex_ext",
666+
srcs = ["regex_ext.cc"],
667+
hdrs = ["regex_ext.h"],
668+
deps = [
669+
"//common:value",
670+
"//eval/public:cel_function_registry",
671+
"//eval/public:cel_options",
672+
"//internal:status_macros",
673+
"//runtime:function_adapter",
674+
"//runtime:function_registry",
675+
"//runtime:runtime_options",
676+
"@com_google_absl//absl/base:nullability",
677+
"@com_google_absl//absl/status",
678+
"@com_google_absl//absl/status:statusor",
679+
"@com_google_absl//absl/strings:str_format",
680+
"@com_google_absl//absl/strings:string_view",
681+
"@com_google_protobuf//:protobuf",
682+
"@com_googlesource_code_re2//:re2",
683+
],
684+
)
685+
686+
cc_test(
687+
name = "regex_ext_test",
688+
srcs = ["regex_ext_test.cc"],
689+
deps = [
690+
":regex_ext",
691+
"//common:value",
692+
"//common:value_testing",
693+
"//extensions/protobuf:runtime_adapter",
694+
"//internal:status_macros",
695+
"//internal:testing",
696+
"//internal:testing_descriptor_pool",
697+
"//parser",
698+
"//runtime",
699+
"//runtime:activation",
700+
"//runtime:optional_types",
701+
"//runtime:reference_resolver",
702+
"//runtime:runtime_builder",
703+
"//runtime:runtime_options",
704+
"//runtime:standard_runtime_builder_factory",
705+
"@com_google_absl//absl/status",
706+
"@com_google_absl//absl/status:status_matchers",
707+
"@com_google_absl//absl/status:statusor",
708+
"@com_google_protobuf//:protobuf",
709+
],
710+
)
711+
664712
cc_test(
665713
name = "formatting_test",
666714
srcs = ["formatting_test.cc"],

extensions/regex_ext.cc

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "extensions/regex_ext.h"
16+
17+
#include <algorithm>
18+
#include <cstdint>
19+
#include <memory>
20+
#include <string>
21+
#include <utility>
22+
23+
#include "absl/base/nullability.h"
24+
#include "absl/status/status.h"
25+
#include "absl/status/statusor.h"
26+
#include "absl/strings/str_format.h"
27+
#include "absl/strings/string_view.h"
28+
#include "common/value.h"
29+
#include "eval/public/cel_function_registry.h"
30+
#include "eval/public/cel_options.h"
31+
#include "internal/status_macros.h"
32+
#include "runtime/function_adapter.h"
33+
#include "runtime/function_registry.h"
34+
#include "runtime/runtime_options.h"
35+
#include "google/protobuf/arena.h"
36+
#include "google/protobuf/descriptor.h"
37+
#include "google/protobuf/message.h"
38+
#include "re2/re2.h"
39+
40+
namespace cel::extensions {
41+
namespace {
42+
43+
Value Extract(const StringValue& target, const StringValue& regex,
44+
const google::protobuf::DescriptorPool* ABSL_NONNULL descriptor_pool,
45+
google::protobuf::MessageFactory* ABSL_NONNULL message_factory,
46+
google::protobuf::Arena* ABSL_NONNULL arena) {
47+
std::string target_scratch;
48+
std::string regex_scratch;
49+
absl::string_view target_view = target.ToStringView(&target_scratch);
50+
absl::string_view regex_view = regex.ToStringView(&regex_scratch);
51+
RE2 re2(regex_view);
52+
if (!re2.ok()) {
53+
return ErrorValue(absl::InvalidArgumentError(
54+
absl::StrFormat("given regex is invalid: %s", re2.error())));
55+
}
56+
const int group_count = re2.NumberOfCapturingGroups();
57+
if (group_count > 1) {
58+
return ErrorValue(absl::InvalidArgumentError(absl::StrFormat(
59+
"regular expression has more than one capturing group: %s",
60+
regex_view)));
61+
}
62+
63+
// Space for the full match (\0) and the first capture group (\1).
64+
absl::string_view submatches[2];
65+
if (re2.Match(target_view, 0, target_view.length(), RE2::UNANCHORED,
66+
submatches, 2)) {
67+
// Return the capture group if it exists else return the full match.
68+
const absl::string_view result_view =
69+
(group_count == 1) ? submatches[1] : submatches[0];
70+
return OptionalValue::Of(StringValue::From(result_view, arena), arena);
71+
}
72+
73+
return OptionalValue::None();
74+
}
75+
76+
Value ExtractAll(const StringValue& target, const StringValue& regex,
77+
const google::protobuf::DescriptorPool* ABSL_NONNULL descriptor_pool,
78+
google::protobuf::MessageFactory* ABSL_NONNULL message_factory,
79+
google::protobuf::Arena* ABSL_NONNULL arena) {
80+
std::string target_scratch;
81+
std::string regex_scratch;
82+
absl::string_view target_view = target.ToStringView(&target_scratch);
83+
absl::string_view regex_view = regex.ToStringView(&regex_scratch);
84+
RE2 re2(regex_view);
85+
if (!re2.ok()) {
86+
return ErrorValue(absl::InvalidArgumentError(
87+
absl::StrFormat("given regex is invalid: %s", re2.error())));
88+
}
89+
const int group_count = re2.NumberOfCapturingGroups();
90+
if (group_count > 1) {
91+
return ErrorValue(absl::InvalidArgumentError(absl::StrFormat(
92+
"regular expression has more than one capturing group: %s",
93+
regex_view)));
94+
}
95+
96+
auto builder = NewListValueBuilder(arena);
97+
absl::string_view temp_target = target_view;
98+
99+
// Space for the full match (\0) and the first capture group (\1).
100+
absl::string_view submatches[2];
101+
const int group_to_extract = (group_count == 1) ? 1 : 0;
102+
103+
while (re2.Match(temp_target, 0, temp_target.length(), RE2::UNANCHORED,
104+
submatches, group_count + 1)) {
105+
const absl::string_view& full_match = submatches[0];
106+
const absl::string_view& desired_capture = submatches[group_to_extract];
107+
108+
// Avoid infinite loops on zero-length matches
109+
if (full_match.empty()) {
110+
if (temp_target.empty()) {
111+
break;
112+
}
113+
temp_target.remove_prefix(1);
114+
continue;
115+
}
116+
117+
if (group_count == 1 && desired_capture.empty()) {
118+
temp_target.remove_prefix(full_match.data() - temp_target.data() +
119+
full_match.length());
120+
continue;
121+
}
122+
123+
absl::Status status =
124+
builder->Add(StringValue::From(desired_capture, arena));
125+
if (!status.ok()) {
126+
return ErrorValue(status);
127+
}
128+
temp_target.remove_prefix(full_match.data() - temp_target.data() +
129+
full_match.length());
130+
}
131+
132+
return std::move(*builder).Build();
133+
}
134+
135+
Value ReplaceAll(const StringValue& target, const StringValue& regex,
136+
const StringValue& replacement,
137+
const google::protobuf::DescriptorPool* ABSL_NONNULL descriptor_pool,
138+
google::protobuf::MessageFactory* ABSL_NONNULL message_factory,
139+
google::protobuf::Arena* ABSL_NONNULL arena) {
140+
std::string target_scratch;
141+
std::string regex_scratch;
142+
std::string replacement_scratch;
143+
absl::string_view target_view = target.ToStringView(&target_scratch);
144+
absl::string_view regex_view = regex.ToStringView(&regex_scratch);
145+
absl::string_view replacement_view =
146+
replacement.ToStringView(&replacement_scratch);
147+
RE2 re2(regex_view);
148+
if (!re2.ok()) {
149+
return ErrorValue(absl::InvalidArgumentError(
150+
absl::StrFormat("given regex is invalid: %s", re2.error())));
151+
}
152+
153+
std::string error_string;
154+
if (!re2.CheckRewriteString(replacement_view, &error_string)) {
155+
return ErrorValue(absl::InvalidArgumentError(
156+
absl::StrFormat("invalid replacement string: %s", error_string)));
157+
}
158+
159+
std::string output(target_view);
160+
RE2::GlobalReplace(&output, re2, replacement_view);
161+
162+
return StringValue::From(std::move(output), arena);
163+
}
164+
165+
Value ReplaceN(const StringValue& target, const StringValue& regex,
166+
const StringValue& replacement, int64_t count,
167+
const google::protobuf::DescriptorPool* ABSL_NONNULL descriptor_pool,
168+
google::protobuf::MessageFactory* ABSL_NONNULL message_factory,
169+
google::protobuf::Arena* ABSL_NONNULL arena) {
170+
if (count == 0) {
171+
return target;
172+
}
173+
if (count < 0) {
174+
return ReplaceAll(target, regex, replacement, descriptor_pool,
175+
message_factory, arena);
176+
}
177+
178+
std::string target_scratch;
179+
std::string regex_scratch;
180+
std::string replacement_scratch;
181+
absl::string_view target_view = target.ToStringView(&target_scratch);
182+
absl::string_view regex_view = regex.ToStringView(&regex_scratch);
183+
absl::string_view replacement_view =
184+
replacement.ToStringView(&replacement_scratch);
185+
RE2 re2(regex_view);
186+
if (!re2.ok()) {
187+
return ErrorValue(absl::InvalidArgumentError(
188+
absl::StrFormat("given regex is invalid: %s", re2.error())));
189+
}
190+
std::string error_string;
191+
if (!re2.CheckRewriteString(replacement_view, &error_string)) {
192+
return ErrorValue(absl::InvalidArgumentError(
193+
absl::StrFormat("invalid replacement string: %s", error_string)));
194+
}
195+
196+
std::string output;
197+
absl::string_view temp_target = target_view;
198+
int replaced_count = 0;
199+
// RE2's Rewrite only supports substitutions for groups \0 through \9.
200+
absl::string_view match[10];
201+
int nmatch = std::min(9, re2.NumberOfCapturingGroups()) + 1;
202+
203+
while (replaced_count < count &&
204+
re2.Match(temp_target, 0, temp_target.length(), RE2::UNANCHORED, match,
205+
nmatch)) {
206+
absl::string_view full_match = match[0];
207+
208+
output.append(temp_target.data(), full_match.data() - temp_target.data());
209+
210+
if (!re2.Rewrite(&output, replacement_view, match, nmatch)) {
211+
// This should ideally not happen given CheckRewriteString passed
212+
return ErrorValue(absl::InternalError("rewrite failed unexpectedly"));
213+
}
214+
215+
temp_target.remove_prefix(full_match.data() - temp_target.data() +
216+
full_match.length());
217+
replaced_count++;
218+
}
219+
220+
output.append(temp_target.data(), temp_target.length());
221+
222+
return StringValue::From(std::move(output), arena);
223+
}
224+
225+
} // namespace
226+
227+
absl::Status RegisterRegexExtensionFunctions(FunctionRegistry& registry) {
228+
CEL_RETURN_IF_ERROR(
229+
(BinaryFunctionAdapter<absl::StatusOr<Value>, StringValue, StringValue>::
230+
RegisterGlobalOverload("regex.extract", &Extract, registry)));
231+
CEL_RETURN_IF_ERROR(
232+
(BinaryFunctionAdapter<absl::StatusOr<Value>, StringValue, StringValue>::
233+
RegisterGlobalOverload("regex.extractAll", &ExtractAll, registry)));
234+
CEL_RETURN_IF_ERROR(
235+
(TernaryFunctionAdapter<
236+
absl::StatusOr<Value>, StringValue, StringValue,
237+
StringValue>::RegisterGlobalOverload("regex.replace", &ReplaceAll,
238+
registry)));
239+
CEL_RETURN_IF_ERROR(
240+
(QuaternaryFunctionAdapter<
241+
absl::StatusOr<Value>, StringValue, StringValue, StringValue,
242+
int64_t>::RegisterGlobalOverload("regex.replace", &ReplaceN,
243+
registry)));
244+
return absl::OkStatus();
245+
}
246+
247+
absl::Status RegisterRegexExtensionFunctions(FunctionRegistry& registry,
248+
const RuntimeOptions& options) {
249+
if (options.enable_regex) {
250+
CEL_RETURN_IF_ERROR(RegisterRegexExtensionFunctions(registry));
251+
}
252+
return absl::OkStatus();
253+
}
254+
255+
absl::Status RegisterRegexExtensionFunctions(
256+
google::api::expr::runtime::CelFunctionRegistry* registry,
257+
const google::api::expr::runtime::InterpreterOptions& options) {
258+
return RegisterRegexExtensionFunctions(
259+
registry->InternalGetRegistry(),
260+
google::api::expr::runtime::ConvertToRuntimeOptions(options));
261+
}
262+
263+
} // namespace cel::extensions

0 commit comments

Comments
 (0)