Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ jobs:
working-directory: rust
run: cargo test


- name: Test (no default features)
working-directory: rust
run: cargo test --no-default-features
31 changes: 29 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,24 @@ const std::optional<lexer_error>& get_last_error();

Returns the last parse error, if any.

### `lexer::get_last_error_location`

```cpp
const std::optional<error_location>& get_last_error_location();
```

Returns the location of the last parse error, if available. Location tracking
is best-effort and may be unavailable.

### `lexer::error_location`

```cpp
struct error_location {
uint32_t line; // 1-based
uint32_t column; // 1-based (byte-oriented)
};
```

## C API

merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or any language that can call C functions. The C API is compiled into the merve library alongside the C++ implementation.
Expand All @@ -141,11 +159,13 @@ merve provides a C API (`merve_c.h`) for use from C programs, FFI bindings, or a
```c
#include "merve_c.h"
#include <stdio.h>
#include <string.h>

int main(void) {
const char* source = "exports.foo = 1;\nexports.bar = 2;\n";

merve_analysis result = merve_parse_commonjs(source, strlen(source));
merve_error_loc err_loc = {0, 0};
merve_analysis result = merve_parse_commonjs(source, strlen(source), &err_loc);

if (merve_is_valid(result)) {
size_t count = merve_get_exports_count(result);
Expand All @@ -157,6 +177,9 @@ int main(void) {
}
} else {
printf("Parse error: %d\n", merve_get_last_error());
if (err_loc.line != 0) {
printf(" at line %u, column %u\n", err_loc.line, err_loc.column);
}
}

merve_free(result);
Expand All @@ -180,12 +203,13 @@ Found 2 exports:
| `merve_string` | Non-owning string reference (`data` + `length`). Not null-terminated. |
| `merve_analysis` | Opaque handle to a parse result. Must be freed with `merve_free()`. |
| `merve_version_components` | Struct with `major`, `minor`, `revision` fields. |
| `merve_error_loc` | Error location (`line`, `column`). `{0,0}` means unavailable. |

#### Functions

| Function | Description |
|----------|-------------|
| `merve_parse_commonjs(input, length)` | Parse CommonJS source. Returns a handle (NULL only on OOM). |
| `merve_parse_commonjs(input, length, out_err)` | Parse CommonJS source and optionally fill error location. Returns a handle (NULL only on OOM). |
| `merve_is_valid(result)` | Check if parsing succeeded. NULL-safe. |
| `merve_free(result)` | Free a parse result. NULL-safe. |
| `merve_get_exports_count(result)` | Number of named exports found. |
Expand All @@ -198,6 +222,9 @@ Found 2 exports:
| `merve_get_version()` | Version string (e.g. `"1.0.1"`). |
| `merve_get_version_components()` | Version as `{major, minor, revision}`. |

On parse failure, `merve_parse_commonjs` writes a non-zero location when
`out_err` is non-NULL and the location is available.

#### Error Constants

| Constant | Value | Description |
Expand Down
24 changes: 24 additions & 0 deletions include/merve/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "merve/version.h"

#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
Expand Down Expand Up @@ -37,6 +38,17 @@ enum lexer_error {
TEMPLATE_NEST_OVERFLOW, ///< Template literal nesting too deep
};

/**
* @brief Source location information for a parse error.
*
* - line and column are 1-based.
* - column is byte-oriented.
*/
struct error_location {
uint32_t line;
uint32_t column;
};

/**
* @brief Type alias for export names.
*
Expand Down Expand Up @@ -146,6 +158,18 @@ std::optional<lexer_analysis> parse_commonjs(std::string_view file_contents);
*/
const std::optional<lexer_error>& get_last_error();

/**
* @brief Get the location of the last failed parse operation.
*
* @return const std::optional<error_location>& The last error location, or
* std::nullopt if unavailable.
*
* @note This is global state and may be overwritten by subsequent calls
* to parse_commonjs().
* @note Location tracking is best-effort and may be unavailable.
*/
const std::optional<error_location>& get_last_error_location();

} // namespace lexer

#endif // MERVE_PARSER_H
37 changes: 31 additions & 6 deletions include/merve_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,19 @@ typedef struct {
int revision;
} merve_version_components;

/**
* @brief Source location for a parse error.
*
* - line and column are 1-based.
* - column is byte-oriented.
*
* A zeroed location (`{0, 0}`) means the location is unavailable.
*/
typedef struct {
uint32_t line;
uint32_t column;
} merve_error_loc;

/* Error codes corresponding to lexer::lexer_error values. */
#define MERVE_ERROR_TODO 0
#define MERVE_ERROR_UNEXPECTED_PAREN 1
Expand All @@ -59,20 +72,32 @@ extern "C" {
#endif

/**
* Parse CommonJS source code and extract export information.
* Parse CommonJS source code and optionally return error location.
*
* The source buffer must remain valid while accessing string_view-backed
* export names from the returned handle.
*
* If @p out_err is non-NULL, it is always written:
* - On success: set to {0, 0}.
* - On parse failure with known location: set to that location.
* - On parse failure without available location: set to {0, 0}.
*
* You must call merve_free() on the returned handle when done.
*
* @param input Pointer to the JavaScript source (need not be null-terminated).
* NULL is treated as an empty string.
* @param length Length of the input in bytes.
* @param input Pointer to the JavaScript source (need not be
* null-terminated). NULL is treated as an empty string.
* @param length Length of the input in bytes.
* @param out_err Optional output pointer for parse error location.
* @return A handle to the parse result, or NULL on out-of-memory.
* Use merve_is_valid() to check if parsing succeeded.
*/
merve_analysis merve_parse_commonjs(const char* input, size_t length);
#ifdef __cplusplus
merve_analysis merve_parse_commonjs(const char* input, size_t length,
merve_error_loc* out_err = nullptr);
#else
merve_analysis merve_parse_commonjs(const char* input, size_t length,
merve_error_loc* out_err);
#endif

/**
* Check whether the parse result is valid (parsing succeeded).
Expand Down Expand Up @@ -165,7 +190,7 @@ const char* merve_get_version(void);
merve_version_components merve_get_version_components(void);

#ifdef __cplusplus
} /* extern "C" */
} /* extern "C" */
#endif

#endif /* MERVE_C_H */
2 changes: 1 addition & 1 deletion rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion rust/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ merve = { version = "...", features = ["libcpp"] }
### `parse_commonjs`

```rust
pub fn parse_commonjs(source: &str) -> Result<Analysis<'_>, LexerError>
pub fn parse_commonjs(source: &str) -> Result<Analysis<'_>, LocatedLexerError>
```

Parse CommonJS source code and extract export information. The returned
Expand Down Expand Up @@ -100,6 +100,17 @@ Returned when the input contains ESM syntax or malformed constructs:

`LexerError` implements `Display` and, with the `std` feature, `std::error::Error`.

### `LocatedLexerError`

```rust
pub struct LocatedLexerError {
pub kind: LexerError,
pub location: Option<ErrorLocation>,
}
```

`ErrorLocation` uses 1-based `line`/`column` (byte-oriented column).

### Versioning helpers

```rust
Expand Down
38 changes: 21 additions & 17 deletions rust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ fn amalgamate_file(
out.push_str(&format!("/* end file {filename} */\n"));
}

/// When building inside the merve repository, produce the three amalgamated
/// files in `deps/`: merve.h, merve.cpp, merve_c.h.
/// When building inside the merve repository, produce the vendored
/// sources in `deps/`: merve.h, merve.cpp, merve_c.cpp, merve_c.h.
fn amalgamate_from_repo(project_root: &Path, deps: &Path) {
let include_path = project_root.join("include");
let source_path = project_root.join("src");
Expand All @@ -159,21 +159,23 @@ fn amalgamate_from_repo(project_root: &Path, deps: &Path) {
);
fs::write(deps.join("merve.h"), &header).expect("failed to write deps/merve.h");

// 2. Amalgamate merve.cpp (parser.cpp + merve_c.cpp with includes resolved).
// 2. Amalgamate merve.cpp (parser.cpp with includes resolved).
let mut source = String::from("#include \"merve.h\"\n\n");
for cpp in &["parser.cpp", "merve_c.cpp"] {
amalgamate_file(
&include_path,
&source_path,
&source_path,
cpp,
&mut source,
&mut included,
);
}
amalgamate_file(
&include_path,
&source_path,
&source_path,
"parser.cpp",
&mut source,
&mut included,
);
fs::write(deps.join("merve.cpp"), &source).expect("failed to write deps/merve.cpp");

// 3. Copy merve_c.h verbatim (standalone C header).
// 3. Copy merve_c.cpp verbatim (C API implementation).
fs::copy(source_path.join("merve_c.cpp"), deps.join("merve_c.cpp"))
.expect("failed to copy merve_c.cpp");

// 4. Copy merve_c.h verbatim (standalone C header).
fs::copy(include_path.join("merve_c.h"), deps.join("merve_c.h"))
.expect("failed to copy merve_c.h");
}
Expand Down Expand Up @@ -207,15 +209,17 @@ fn main() {
}
}

// Both in-repo and published crate use the same layout: merve.cpp + merve.h + merve_c.h
// Both in-repo and published crate use the same layout:
// merve.cpp + merve_c.cpp + merve.h + merve_c.h
assert!(
deps.join("merve.cpp").exists(),
deps.join("merve.cpp").exists() && deps.join("merve_c.cpp").exists(),
"No C++ sources found in deps/. \
When building outside the repository, deps/ must contain the amalgamated sources."
When building outside the repository, deps/ must contain the vendored sources."
);

let mut build = cc::Build::new();
build.file(deps.join("merve.cpp"));
build.file(deps.join("merve_c.cpp"));
build.include(&deps);
build.cpp(true).std("c++20").warnings(false);

Expand Down
Loading
Loading