Skip to content

Commit d20e1ef

Browse files
TristonianJoneskyessenov
authored andcommitted
Lower the size of the input expression to 100k codepoints
Also, introduce `ParserOptions` to encapsulate limit and behavior settings. PiperOrigin-RevId: 368935535
1 parent ac181d3 commit d20e1ef

5 files changed

Lines changed: 77 additions & 23 deletions

File tree

parser/BUILD

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ cc_library(
2525
deps = [
2626
":cel_cc_parser",
2727
":macro",
28+
":options",
2829
":source_factory",
2930
":visitor",
3031
"@antlr4_runtimes//:cpp",
32+
"@com_google_absl//absl/status",
3133
"@com_google_absl//absl/status:statusor",
3234
"@com_google_absl//absl/strings",
3335
"@com_google_absl//absl/strings:str_format",
@@ -108,10 +110,16 @@ cc_library(
108110
],
109111
)
110112

113+
cc_library(
114+
name = "options",
115+
hdrs = ["options.h"],
116+
)
117+
111118
cc_test(
112119
name = "parser_test",
113120
srcs = ["parser_test.cc"],
114121
deps = [
122+
":options",
115123
":parser",
116124
":source_factory",
117125
"//testutil:expr_printer",

parser/options.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#ifndef THIRD_PARTY_CEL_CPP_PARSER_OPTIONS_H_
2+
#define THIRD_PARTY_CEL_CPP_PARSER_OPTIONS_H_
3+
4+
namespace google {
5+
namespace api {
6+
namespace expr {
7+
namespace parser {
8+
9+
constexpr int kDefaultErrorRecoveryLimit = 30;
10+
constexpr int kDefaultMaxRecursionDepth = 250;
11+
constexpr int kExpressionSizeCodepointLimit = 100'000;
12+
13+
// Options for configuring the limits and features of the parser.
14+
struct ParserOptions {
15+
// Limit of the number of error recovery attempts made by the ANTLR parser
16+
// when processing an input. This limit, when reached, will halt further
17+
// parsing of the expression.
18+
int error_recovery_limit = kDefaultErrorRecoveryLimit;
19+
20+
// Limit on the amount of recusive parse instructions permitted when building
21+
// the abstract syntax tree for the expression. This prevents pathological
22+
// inputs from causing stack overflows.
23+
int max_recursion_depth = kDefaultMaxRecursionDepth;
24+
25+
// Limit on the number of codepoints in the input string which the parser will
26+
// attempt to parse.
27+
int expression_size_codepoint_limit = kExpressionSizeCodepointLimit;
28+
};
29+
30+
} // namespace parser
31+
} // namespace expr
32+
} // namespace api
33+
} // namespace google
34+
35+
#endif // THIRD_PARTY_CEL_CPP_PARSER_OPTIONS_H_

parser/parser.cc

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
#include "parser/parser.h"
22

3+
#include "absl/status/status.h"
34
#include "absl/status/statusor.h"
45
#include "absl/strings/str_format.h"
56
#include "absl/strings/str_replace.h"
67
#include "absl/types/optional.h"
78
#include "parser/cel_grammar.inc/cel_grammar/CelLexer.h"
89
#include "parser/cel_grammar.inc/cel_grammar/CelParser.h"
10+
#include "parser/options.h"
911
#include "parser/source_factory.h"
1012
#include "parser/visitor.h"
1113
#include "antlr4-runtime.h"
@@ -126,19 +128,15 @@ class RecoveryLimitErrorStrategy : public antlr4::DefaultErrorStrategy {
126128

127129
absl::StatusOr<ParsedExpr> Parse(const std::string& expression,
128130
const std::string& description,
129-
int max_recursion_depth,
130-
int error_recovery_limit) {
131-
return ParseWithMacros(expression, Macro::AllMacros(), description,
132-
max_recursion_depth, error_recovery_limit);
131+
const ParserOptions& options) {
132+
return ParseWithMacros(expression, Macro::AllMacros(), description, options);
133133
}
134134

135135
absl::StatusOr<ParsedExpr> ParseWithMacros(const std::string& expression,
136136
const std::vector<Macro>& macros,
137137
const std::string& description,
138-
int max_recursion_depth,
139-
int error_recovery_limit) {
140-
auto result = EnrichedParse(expression, macros, description,
141-
max_recursion_depth, error_recovery_limit);
138+
const ParserOptions& options) {
139+
auto result = EnrichedParse(expression, macros, description, options);
142140
if (result.ok()) {
143141
return result->parsed_expr();
144142
}
@@ -147,14 +145,19 @@ absl::StatusOr<ParsedExpr> ParseWithMacros(const std::string& expression,
147145

148146
absl::StatusOr<VerboseParsedExpr> EnrichedParse(
149147
const std::string& expression, const std::vector<Macro>& macros,
150-
const std::string& description, int max_recursion_depth,
151-
int error_recovery_limit) {
148+
const std::string& description, const ParserOptions& options) {
152149
ANTLRInputStream input(expression);
150+
if (input.size() > options.expression_size_codepoint_limit) {
151+
return absl::InvalidArgumentError(absl::StrCat(
152+
"expression size exceeds codepoint limit.", " input size: ",
153+
input.size(), ", limit: ", options.expression_size_codepoint_limit));
154+
}
153155
CelLexer lexer(&input);
154156
CommonTokenStream tokens(&lexer);
155157
CelParser parser(&tokens);
156-
ExprRecursionListener listener(max_recursion_depth);
157-
ParserVisitor visitor(description, expression, max_recursion_depth, macros);
158+
ExprRecursionListener listener(options.max_recursion_depth);
159+
ParserVisitor visitor(description, expression, options.max_recursion_depth,
160+
macros);
158161

159162
lexer.removeErrorListeners();
160163
parser.removeErrorListeners();
@@ -165,7 +168,7 @@ absl::StatusOr<VerboseParsedExpr> EnrichedParse(
165168
// Limit the number of error recovery attempts to prevent bad expressions
166169
// from consuming lots of cpu / memory.
167170
std::shared_ptr<RecoveryLimitErrorStrategy> error_strategy(
168-
new RecoveryLimitErrorStrategy(error_recovery_limit));
171+
new RecoveryLimitErrorStrategy(options.error_recovery_limit));
169172
parser.setErrorHandler(error_strategy);
170173

171174
CelParser::StartContext* root;

parser/parser.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@
55
#include "absl/status/statusor.h"
66
#include "absl/types/optional.h"
77
#include "parser/macro.h"
8+
#include "parser/options.h"
89
#include "parser/source_factory.h"
910

1011
namespace google {
1112
namespace api {
1213
namespace expr {
1314
namespace parser {
1415

15-
constexpr int kDefaultErrorRecoveryLimit = 30;
16-
constexpr int kDefaultMaxRecursionDepth = 250;
17-
1816
class VerboseParsedExpr {
1917
public:
2018
VerboseParsedExpr(google::api::expr::v1alpha1::ParsedExpr parsed_expr,
@@ -37,19 +35,16 @@ class VerboseParsedExpr {
3735
absl::StatusOr<VerboseParsedExpr> EnrichedParse(
3836
const std::string& expression, const std::vector<Macro>& macros,
3937
const std::string& description = "<input>",
40-
int max_recursion_depth = kDefaultMaxRecursionDepth,
41-
int error_recovery_limit = kDefaultErrorRecoveryLimit);
38+
const ParserOptions& options = ParserOptions());
4239

4340
absl::StatusOr<google::api::expr::v1alpha1::ParsedExpr> Parse(
4441
const std::string& expression, const std::string& description = "<input>",
45-
int max_recursion_depth = kDefaultMaxRecursionDepth,
46-
int error_recovery_limit = kDefaultErrorRecoveryLimit);
42+
const ParserOptions& options = ParserOptions());
4743

4844
absl::StatusOr<google::api::expr::v1alpha1::ParsedExpr> ParseWithMacros(
4945
const std::string& expression, const std::vector<Macro>& macros,
5046
const std::string& description = "<input>",
51-
int max_recursion_depth = kDefaultMaxRecursionDepth,
52-
int error_recovery_limit = kDefaultErrorRecoveryLimit);
47+
const ParserOptions& options = ParserOptions());
5348

5449
} // namespace parser
5550
} // namespace expr

parser/parser_test.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "absl/strings/str_format.h"
1212
#include "absl/strings/str_join.h"
1313
#include "absl/types/optional.h"
14+
#include "parser/options.h"
1415
#include "parser/source_factory.h"
1516
#include "testutil/expr_printer.h"
1617

@@ -998,7 +999,9 @@ TEST(ExpressionTest, TsanOom) {
998999
}
9991000

10001001
TEST(ExpressionTest, ErrorRecoveryLimits) {
1001-
auto result = Parse("......", "", kDefaultMaxRecursionDepth, 1);
1002+
ParserOptions options;
1003+
options.error_recovery_limit = 1;
1004+
auto result = Parse("......", "", options);
10021005
EXPECT_FALSE(result.ok());
10031006
EXPECT_EQ(result.status().message(),
10041007
"ERROR: :1:2: Syntax error: missing IDENTIFIER at '.'\n"
@@ -1009,6 +1012,16 @@ TEST(ExpressionTest, ErrorRecoveryLimits) {
10091012
" | ..^");
10101013
}
10111014

1015+
TEST(ExpressionTest, ExpressionSizeLimit) {
1016+
ParserOptions options;
1017+
options.expression_size_codepoint_limit = 10;
1018+
auto result = Parse("...............", "", options);
1019+
EXPECT_FALSE(result.ok());
1020+
EXPECT_EQ(
1021+
result.status().message(),
1022+
"expression size exceeds codepoint limit. input size: 15, limit: 10");
1023+
}
1024+
10121025
INSTANTIATE_TEST_SUITE_P(CelParserTest, ExpressionTest,
10131026
testing::ValuesIn(test_cases));
10141027

0 commit comments

Comments
 (0)