From c1231fdb5f7c18e2f9340f80695f6e80f8f39851 Mon Sep 17 00:00:00 2001 From: sanjin Date: Sat, 23 May 2026 01:22:39 +0200 Subject: [PATCH] Release 1.0.0-beta.20 string search and trim foundation --- ...STRING_SEARCH_AND_ASCII_TRIM_FOUNDATION.md | 89 ++++ .llm/reviews/BETA_20_RELEASE_REVIEW.md | 95 ++++ README.md | 29 +- compiler/Cargo.lock | 2 +- compiler/Cargo.toml | 2 +- compiler/tests/promotion_gate.rs | 18 +- ...tandard_core_facade_source_search_alpha.rs | 10 +- .../standard_string_search_trim_beta20.rs | 478 ++++++++++++++++++ ...rd_string_source_fallback_helpers_alpha.rs | 14 +- docs/POST_BETA_ROADMAP.md | 10 + docs/compiler/RELEASE_NOTES.md | 35 ++ docs/compiler/ROADMAP.md | 12 +- docs/language/RELEASE_NOTES.md | 38 +- docs/language/ROADMAP.md | 17 +- docs/language/SPEC-v1.md | 16 +- docs/language/STDLIB_API.md | 12 +- .../projects/std-import-string/src/main.slo | 58 ++- .../projects/std-import-string/src/main.slo | 58 ++- .../std-layout-local-string/src/main.slo | 58 ++- .../std-layout-local-string/src/string.slo | 95 +++- lib/std/README.md | 9 +- lib/std/string.slo | 95 +++- scripts/release-gate.sh | 1 + 23 files changed, 1214 insertions(+), 37 deletions(-) create mode 100644 .llm/BETA_20_STRING_SEARCH_AND_ASCII_TRIM_FOUNDATION.md create mode 100644 .llm/reviews/BETA_20_RELEASE_REVIEW.md create mode 100644 compiler/tests/standard_string_search_trim_beta20.rs diff --git a/.llm/BETA_20_STRING_SEARCH_AND_ASCII_TRIM_FOUNDATION.md b/.llm/BETA_20_STRING_SEARCH_AND_ASCII_TRIM_FOUNDATION.md new file mode 100644 index 0000000..1b20820 --- /dev/null +++ b/.llm/BETA_20_STRING_SEARCH_AND_ASCII_TRIM_FOUNDATION.md @@ -0,0 +1,89 @@ +# 1.0.0-beta.20 String Search And ASCII Trim Foundation + +## Scope + +`1.0.0-beta.20` is a standard-library and compiler-gate slice. It does not +change source-language syntax, runtime C, compiler-known runtime names, or +ABI/layout policy. + +The release adds source-authored `std.string` helpers: + +- `contains ((value string) (needle string)) -> bool` +- `index_of_option ((value string) (needle string)) -> (option i32)` +- `last_index_of_option ((value string) (needle string)) -> (option i32)` +- `trim_ascii_start ((value string)) -> string` +- `trim_ascii_end ((value string)) -> string` +- `trim_ascii ((value string)) -> string` + +## Contract + +Search is byte-oriented over the current runtime string representation. +`index_of_option` returns the first zero-based byte offset for a matching +needle, `last_index_of_option` returns the last zero-based byte offset, and +`contains` is true when `index_of_option` returns `some`. + +Empty needles are valid: + +- `index_of_option value ""` returns `some 0` +- `last_index_of_option value ""` returns `some (len value)` +- `contains value ""` returns `true` + +Missing needles return `none`. + +ASCII trim removes only these byte values from the requested edges: + +- `9` horizontal tab +- `10` line feed +- `11` vertical tab +- `12` form feed +- `13` carriage return +- `32` space + +The helpers compose over already-promoted string primitives: + +- `std.string.len` +- `std.string.byte_at_result` +- `std.string.slice_result` +- `std.string.starts_with` +- `std.string.ends_with` + +## Non-Scope + +This scope does not add: + +- compiler-known `std.string.*` runtime names for the new helpers +- runtime C helper implementations +- source-language syntax +- Unicode scalar, grapheme, display-width, or normalization semantics +- case folding or locale-sensitive search +- regular expressions +- tokenizer/parser APIs +- mutable strings +- language slice/view syntax +- stable string ABI/layout +- stable allocation ownership rules +- stable standard-library compatibility +- performance claims + +## Acceptance Criteria + +- `lib/std/string.slo` exports all six helpers. +- Explicit `std.string` import examples exercise contains, first/last search, + missing needles, empty needles, leading trim, trailing trim, full trim, + all-whitespace trim, and no-trim cases. +- Local `std-layout-local-string` examples mirror the public helper surface. +- Focused compiler coverage verifies the helpers are source-authored and that + direct compiler-known runtime calls for the new names remain unsupported. +- `scripts/release-gate.sh` runs the focused beta20 test. +- Generated standard-library API documentation includes the new signatures. + +## Gates + +```bash +cargo fmt --check +cargo test --test standard_string_search_trim_beta20 +cargo test --test standard_string_source_fallback_helpers_alpha +cargo test --test standard_string_scanning_beta16 +cargo test +./scripts/release-gate.sh +``` diff --git a/.llm/reviews/BETA_20_RELEASE_REVIEW.md b/.llm/reviews/BETA_20_RELEASE_REVIEW.md new file mode 100644 index 0000000..2acf304 --- /dev/null +++ b/.llm/reviews/BETA_20_RELEASE_REVIEW.md @@ -0,0 +1,95 @@ +# 1.0.0-beta.20 Release Review + +Scope: String Search And ASCII Trim Foundation + +## Findings + +No blocking findings. + +The beta20 diff matches the documented release contract at the +release-blocking level. `lib/std/string.slo:1` exports all six new helpers, and +the implementations remain ordinary source helpers over the existing string +facade primitives: first search at `lib/std/string.slo:30`, last search at +`lib/std/string.slo:47`, `contains` at `lib/std/string.slo:64`, and ASCII trim +at `lib/std/string.slo:71`. The local mirror in +`examples/projects/std-layout-local-string/src/string.slo:1` exposes the same +surface without changing the public contract shape. + +The example coverage is explicit and scoped correctly. The standard import +example covers present, missing, empty-needle, first/last index, leading trim, +trailing trim, full trim, all-whitespace trim, and no-trim cases at +`examples/projects/std-import-string/src/main.slo:204`; the documentation copy +matches at `docs/language/examples/projects/std-import-string/src/main.slo:204`; +the local fixture mirrors the same behavior at +`examples/projects/std-layout-local-string/src/main.slo:204`. + +The compiler gate coverage is aligned with the non-scope. The focused beta20 +test builds an explicit `std.string` import fixture at +`compiler/tests/standard_string_search_trim_beta20.rs:47`, asserts the helpers +are source-authored at `compiler/tests/standard_string_search_trim_beta20.rs:178`, +and rejects direct compiler-known runtime calls for the new names. The existing +local-string fallback test now expects and inventories the new helpers at +`compiler/tests/standard_string_source_fallback_helpers_alpha.rs:8` and +`compiler/tests/standard_string_source_fallback_helpers_alpha.rs:24`. +The repo-root standard-source import gate and promotion gate are also aligned at +`compiler/tests/standard_core_facade_source_search_alpha.rs:8`, +`compiler/tests/standard_core_facade_source_search_alpha.rs:33`, +`compiler/tests/promotion_gate.rs:1318`, and +`compiler/tests/promotion_gate.rs:7368`. + +## Contract Drift + +No blocking contract drift found. + +The README, language release notes, language roadmap, v1 spec, compiler release +notes, compiler roadmap, post-beta roadmap, stdlib README, and beta20 `.llm` +contract all describe the same narrow surface: byte-oriented search, empty +needle behavior, ASCII-only trimming over bytes `9`, `10`, `11`, `12`, `13`, +and `32`, no new compiler-known runtime names, no runtime C work, no new source +syntax, and no Unicode, regex, tokenizer, mutable-string, stable ABI/layout, or +stable stdlib/API claims. Representative anchors: `README.md:144`, +`docs/language/RELEASE_NOTES.md:39`, `docs/language/ROADMAP.md:80`, +`docs/language/SPEC-v1.md:232`, `docs/compiler/RELEASE_NOTES.md:15`, +`docs/compiler/ROADMAP.md:100`, `docs/POST_BETA_ROADMAP.md:111`, +`lib/std/README.md:233`, and +`.llm/BETA_20_STRING_SEARCH_AND_ASCII_TRIM_FOUNDATION.md:1`. + +The version and generated catalog updates are coherent: `compiler/Cargo.toml:3` +and `compiler/Cargo.lock` are bumped to `1.0.0-beta.20`, +`docs/language/STDLIB_API.md:18` reports 596 exported signatures, and +`docs/language/STDLIB_API.md:486` lists 36 `std.string` signatures including +the six beta20 helpers. `scripts/release-gate.sh:73` wires the focused beta20 +test into the release gate. + +## Verification Notes + +Inspected: + +- Working tree diff and untracked beta20 contract/test files for std source, + explicit std/local examples, docs, compiler tests, generated catalog, version + bump, and release-gate integration. +- Contract drift against README, language roadmap/spec/release notes, compiler + roadmap/release notes, post-beta roadmap, stdlib README, and the beta20 `.llm` + contract. +- Sibling `glagol` repository status; no sibling worktree changes were present. + +Read-only checks run: + +- `git diff --check` - passed. +- `git diff --cached --check` - passed. +- `bash -n scripts/release-gate.sh` - passed. +- `cargo fmt --check --manifest-path compiler/Cargo.toml` - passed. +- Local/private publication text scan over source/docs/tests/.llm with build + artifacts excluded - passed. + +Not run: + +- Focused cargo tests, full `cargo test`, and `./scripts/release-gate.sh`. + Those commands write build/generated artifacts, and this review was + constrained to read-only commands except for the review file. + +## Verdict + +Release-ready from this review. No blocking beta20 issues remain in the current +working tree diff. The controller should still run the focused beta20 test stack +and full release gate before tagging. diff --git a/README.md b/README.md index ce9383c..0057c2e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This repository is the canonical public monorepo for the language design, standard library source, compiler, runtime, examples, benchmarks, and technical documents. -Current release: `1.0.0-beta.19`. +Current release: `1.0.0-beta.20`. ## Repository Layout @@ -24,7 +24,7 @@ scripts/ local release and document tooling ## Beta Scope -`1.0.0-beta.19` keeps the `1.0.0-beta` language baseline, includes the +`1.0.0-beta.20` keeps the `1.0.0-beta` language baseline, includes the `1.0.0-beta.1` tooling/install hardening slice, the `1.0.0-beta.2` runtime/resource foundation bundle, the `1.0.0-beta.3` standard-library stabilization bundle, the `1.0.0-beta.4` language-usability diagnostics @@ -40,8 +40,9 @@ slice, the `1.0.0-beta.13` diagnostic catalog and schema policy slice, the `1.0.0-beta.15` reserved generic collection boundary hardening and collection ledger, the `1.0.0-beta.16` string scanning and token boundary foundation, the `1.0.0-beta.17` JSON primitive scalar parsing foundation, the -`1.0.0-beta.18` JSON string token parsing foundation, and the -`1.0.0-beta.19` test discovery and user-project conformance foundation. +`1.0.0-beta.18` JSON string token parsing foundation, the +`1.0.0-beta.19` test discovery and user-project conformance foundation, and +the `1.0.0-beta.20` string search and ASCII trim foundation. The language baseline supports practical local command-line, file, and loopback-network programs with: @@ -57,6 +58,7 @@ loopback-network programs with: - beta-scoped loopback TCP handles through `std.net` - JSON string quoting, compact JSON text construction, primitive scalar token parsing, and ASCII JSON string-token parsing through `std.json` +- byte-oriented string search and ASCII edge trimming through `std.string` - hosted native builds through LLVM IR, Clang, and `runtime/runtime.c` The generated standard-library API catalog is a beta discovery aid: it lists @@ -139,6 +141,14 @@ tests without executing test bodies. The list mode preserves existing file, project, and workspace test ordering, honors `--filter `, and remains beta tooling rather than a stable output schema. +The `1.0.0-beta.20` string search and ASCII trim foundation adds +`std.string.contains`, `std.string.index_of_option`, +`std.string.last_index_of_option`, `std.string.trim_ascii_start`, +`std.string.trim_ascii_end`, and `std.string.trim_ascii` as ordinary source +helpers over the existing byte string primitives. Search is byte-oriented, +empty needles match at the first index and at `(len value)` for last search, +and ASCII trimming removes only bytes `9`, `10`, `11`, `12`, `13`, and `32`. + Still deferred before stable: executable generics, generic aliases, maps/sets, broad package registry semantics, stable Markdown schema, stable stdlib/API compatibility freeze, DNS/TLS/async networking, LSP/watch guarantees, SARIF @@ -414,6 +424,17 @@ semantics; full JSON parsing; object/array parsing; tokenizer APIs; a language slice/view feature; mutable strings; stable ABI/layout; or a stable stdlib/API freeze. +## 1.0.0-beta.20 String Search And ASCII Trim Foundation + +The `1.0.0-beta.20` scope extends the source-authored `std.string` facade +with byte-oriented search and ASCII trim helpers: +`contains`, `index_of_option`, `last_index_of_option`, `trim_ascii_start`, +`trim_ascii_end`, and `trim_ascii`. + +This scope adds no compiler-known runtime names. It does not claim Unicode +scalar, grapheme, case-folding, locale, regex, tokenizer, mutable string, +language slice/view, stable ABI/layout, or stable stdlib/API semantics. + ## 1.0.0-beta.15 Reserved Generic Collection Boundary Hardening And Collection Ledger The `1.0.0-beta.15` release documents the current concrete collection and diff --git a/compiler/Cargo.lock b/compiler/Cargo.lock index 96f73d4..dd2151b 100644 --- a/compiler/Cargo.lock +++ b/compiler/Cargo.lock @@ -4,4 +4,4 @@ version = 3 [[package]] name = "glagol" -version = "1.0.0-beta.19" +version = "1.0.0-beta.20" diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 43df5b7..926a79b 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "glagol" -version = "1.0.0-beta.19" +version = "1.0.0-beta.20" edition = "2021" description = "Glagol, the first compiler for the Slovo language" license = "MIT OR Apache-2.0" diff --git a/compiler/tests/promotion_gate.rs b/compiler/tests/promotion_gate.rs index 9a81ecc..a4a3caf 100644 --- a/compiler/tests/promotion_gate.rs +++ b/compiler/tests/promotion_gate.rs @@ -1322,6 +1322,12 @@ const STANDARD_STRING_SOURCE_FACADE_ALPHA: &[&str] = &[ "slice_result", "starts_with", "ends_with", + "contains", + "index_of_option", + "last_index_of_option", + "trim_ascii_start", + "trim_ascii_end", + "trim_ascii", "parse_i32_result", "parse_i32_option", "parse_u32_result", @@ -7374,8 +7380,10 @@ fn assert_project_std_import_string_tooling_matches_fixture(project: &Path) { "test \"explicit std string parse integer fallbacks\" ... ok\n", "test \"explicit std string parse float bool fallbacks\" ... ok\n", "test \"explicit std string parse custom fallbacks\" ... ok\n", + "test \"explicit std string search helpers\" ... ok\n", + "test \"explicit std string ascii trim helpers\" ... ok\n", "test \"explicit std string helpers all\" ... ok\n", - "10 test(s) passed\n", + "12 test(s) passed\n", ), ); } @@ -8854,8 +8862,10 @@ fn assert_project_std_layout_local_string_tooling_matches_fixture(project: &Path "test \"explicit local string parse integer fallbacks\" ... ok\n", "test \"explicit local string parse float bool fallbacks\" ... ok\n", "test \"explicit local string parse custom fallbacks\" ... ok\n", + "test \"explicit local string search helpers\" ... ok\n", + "test \"explicit local string ascii trim helpers\" ... ok\n", "test \"explicit local string helpers all\" ... ok\n", - "10 test(s) passed\n", + "12 test(s) passed\n", ), "std layout local string project test", ); @@ -8896,11 +8906,11 @@ fn assert_standard_string_source_fallback_helpers_alpha(project: &Path) { "standard string source helper fixture must use only existing std.string runtime names", ); assert!( - !string.contains("trim") - && !string.contains("locale") + !string.contains("locale") && !string.contains("unicode") && !string.contains("bytes") && !string.contains("case_insensitive") + && !string.contains("regex") && !string.contains("host_error"), "standard string source helper fixture must not claim deferred parsing or richer error APIs" ); diff --git a/compiler/tests/standard_core_facade_source_search_alpha.rs b/compiler/tests/standard_core_facade_source_search_alpha.rs index 09cbfed..ed50d31 100644 --- a/compiler/tests/standard_core_facade_source_search_alpha.rs +++ b/compiler/tests/standard_core_facade_source_search_alpha.rs @@ -15,8 +15,10 @@ const EXPECTED_STD_STRING_OUTPUT: &str = concat!( "test \"explicit std string parse integer fallbacks\" ... ok\n", "test \"explicit std string parse float bool fallbacks\" ... ok\n", "test \"explicit std string parse custom fallbacks\" ... ok\n", + "test \"explicit std string search helpers\" ... ok\n", + "test \"explicit std string ascii trim helpers\" ... ok\n", "test \"explicit std string helpers all\" ... ok\n", - "10 test(s) passed\n", + "12 test(s) passed\n", ); const EXPECTED_STD_NUM_OUTPUT: &str = concat!( @@ -39,6 +41,12 @@ fn explicit_std_string_import_loads_repo_root_standard_source() { "slice_result", "starts_with", "ends_with", + "contains", + "index_of_option", + "last_index_of_option", + "trim_ascii_start", + "trim_ascii_end", + "trim_ascii", "parse_i32_result", "parse_i32_option", "parse_u32_result", diff --git a/compiler/tests/standard_string_search_trim_beta20.rs b/compiler/tests/standard_string_search_trim_beta20.rs new file mode 100644 index 0000000..d297072 --- /dev/null +++ b/compiler/tests/standard_string_search_trim_beta20.rs @@ -0,0 +1,478 @@ +use std::{ + env, + ffi::OsStr, + fs, + path::{Path, PathBuf}, + process::{Command, Output}, + sync::atomic::{AtomicUsize, Ordering}, +}; + +static NEXT_TEMP_ID: AtomicUsize = AtomicUsize::new(0); + +const EXPECTED_TEST_OUTPUT: &str = concat!( + "test \"explicit std string contains\" ... ok\n", + "test \"explicit std string index_of_option\" ... ok\n", + "test \"explicit std string last_index_of_option\" ... ok\n", + "test \"explicit std string ascii trim\" ... ok\n", + "test \"explicit std string search trim composition\" ... ok\n", + "test \"explicit std string search trim all\" ... ok\n", + "6 test(s) passed\n", +); + +const STANDARD_STRING_SEARCH_TRIM_BETA20: &[&str] = &[ + "contains", + "index_of_option", + "last_index_of_option", + "trim_ascii_start", + "trim_ascii_end", + "trim_ascii", +]; + +const ALLOWED_STD_REFERENCES: &[&str] = &[ + "std.result", + "std.string.parse_bool_result", + "std.string.parse_f64_result", + "std.string.parse_i64_result", + "std.string.parse_u64_result", + "std.string.parse_i32_result", + "std.string.parse_u32_result", + "std.string.byte_at_result", + "std.string.slice_result", + "std.string.starts_with", + "std.string.ends_with", + "std.string.concat", + "std.string.len", +]; + +#[test] +fn explicit_std_string_search_and_ascii_trim_helpers_check_and_test() { + let project = write_project( + "std-string-search-trim-beta20", + r#" +(module main) + +(import std.string (contains index_of_option last_index_of_option trim_ascii_start trim_ascii_end trim_ascii)) + +(fn option_i32_eq ((maybe (option i32)) (expected i32)) -> bool + (match maybe + ((some value) + (= value expected)) + ((none) + false))) + +(fn option_i32_none ((maybe (option i32))) -> bool + (match maybe + ((some value) + false) + ((none) + true))) + +(fn imported_string_contains_ok () -> bool + (if (contains "slovo compiler" "slo") + (if (contains "slovo compiler" "compiler") + (= (contains "slovo compiler" "missing") false) + false) + false)) + +(fn imported_string_index_of_ok () -> bool + (if (option_i32_eq (index_of_option "bananana" "ana") 1) + (if (option_i32_eq (index_of_option "slovo" "s") 0) + (if (option_i32_eq (index_of_option "slovo" "vo") 3) + (option_i32_none (index_of_option "slovo" "compiler")) + false) + false) + false)) + +(fn imported_string_last_index_of_ok () -> bool + (if (option_i32_eq (last_index_of_option "bananana" "ana") 5) + (if (option_i32_eq (last_index_of_option "slovo" "o") 4) + (if (option_i32_eq (last_index_of_option "slovo" "s") 0) + (option_i32_none (last_index_of_option "slovo" "compiler")) + false) + false) + false)) + +(fn imported_string_ascii_trim_ok () -> bool + (if (= (trim_ascii_start "\n\t slovo \t") "slovo \t") + (if (= (trim_ascii_end "\n\t slovo \t") "\n\t slovo") + (if (= (trim_ascii "\n\t slovo \t") "slovo") + (if (= (trim_ascii "slovo") "slovo") + (= (trim_ascii " ") "") + false) + false) + false) + false)) + +(fn imported_string_search_trim_composes_ok () -> bool + (if (= (trim_ascii " slovo compiler ") "slovo compiler") + (if (contains (trim_ascii " slovo compiler ") "compiler") + (if (option_i32_eq (index_of_option (trim_ascii_start "\t\tprefix-core") "core") 7) + (option_i32_eq (last_index_of_option (trim_ascii_end "core-core\n") "core") 5) + false) + false) + false)) + +(fn imported_string_search_trim_all_ok () -> bool + (if (imported_string_contains_ok) + (if (imported_string_index_of_ok) + (if (imported_string_last_index_of_ok) + (if (imported_string_ascii_trim_ok) + (imported_string_search_trim_composes_ok) + false) + false) + false) + false)) + +(fn main () -> i32 + (if (imported_string_search_trim_all_ok) + 42 + 1)) + +(test "explicit std string contains" + (imported_string_contains_ok)) + +(test "explicit std string index_of_option" + (imported_string_index_of_ok)) + +(test "explicit std string last_index_of_option" + (imported_string_last_index_of_ok)) + +(test "explicit std string ascii trim" + (imported_string_ascii_trim_ok)) + +(test "explicit std string search trim composition" + (imported_string_search_trim_composes_ok)) + +(test "explicit std string search trim all" + (= (main) 42)) +"#, + ); + + let source = read(&project.join("src/main.slo")); + let std_string = read(&std_string_path()); + + assert!( + !project.join("src/string.slo").exists(), + "beta20 fixture must exercise repo-root std.string, not a local module copy" + ); + assert!( + source.starts_with("(module main)\n\n(import std.string ("), + "beta20 fixture must use an explicit std.string import" + ); + assert_std_string_search_trim_facades(&std_string); + + let fmt = run_glagol([ + OsStr::new("fmt"), + OsStr::new("--check"), + project.as_os_str(), + ]); + assert_success("std string search trim fmt --check", &fmt); + + let check = run_glagol([OsStr::new("check"), project.as_os_str()]); + assert_success_stdout(check, "", "std string search trim check"); + + let test = run_glagol([OsStr::new("test"), project.as_os_str()]); + assert_success_stdout(test, EXPECTED_TEST_OUTPUT, "std string search trim test"); +} + +#[test] +fn string_search_and_ascii_trim_helpers_are_not_compiler_known_runtime_calls() { + let std_string = read(&std_string_path()); + assert_std_string_search_trim_facades(&std_string); + + for helper in STANDARD_STRING_SEARCH_TRIM_BETA20 { + assert!( + !std_string.contains(&format!("std.string.{}", helper)), + "std.string.{} must remain source-authored, not a compiler-known runtime call", + helper + ); + assert!( + !std_string.contains(&format!("__glagol_string_{}", helper)), + "std.string.{} must not introduce a private runtime symbol", + helper + ); + } + + let cases = [ + UnsupportedRuntimeCase { + name: "contains", + symbol: "std.string.contains", + source: r#" +(module main) + +(fn main () -> i32 + (if (std.string.contains "slovo" "ovo") + 0 + 1)) +"#, + }, + UnsupportedRuntimeCase { + name: "index-of-option", + symbol: "std.string.index_of_option", + source: r#" +(module main) + +(fn main () -> i32 + (match (std.string.index_of_option "slovo" "o") + ((some value) + value) + ((none) + 0))) +"#, + }, + UnsupportedRuntimeCase { + name: "last-index-of-option", + symbol: "std.string.last_index_of_option", + source: r#" +(module main) + +(fn main () -> i32 + (match (std.string.last_index_of_option "slovo" "o") + ((some value) + value) + ((none) + 0))) +"#, + }, + UnsupportedRuntimeCase { + name: "trim-ascii-start", + symbol: "std.string.trim_ascii_start", + source: r#" +(module main) + +(fn main () -> i32 + (std.string.len (std.string.trim_ascii_start " slovo"))) +"#, + }, + UnsupportedRuntimeCase { + name: "trim-ascii-end", + symbol: "std.string.trim_ascii_end", + source: r#" +(module main) + +(fn main () -> i32 + (std.string.len (std.string.trim_ascii_end "slovo "))) +"#, + }, + UnsupportedRuntimeCase { + name: "trim-ascii", + symbol: "std.string.trim_ascii", + source: r#" +(module main) + +(fn main () -> i32 + (std.string.len (std.string.trim_ascii " slovo "))) +"#, + }, + ]; + + for case in cases { + let fixture = write_fixture(case.name, case.source); + let output = run_glagol([fixture.as_os_str()]); + assert_failure_stderr_contains( + &format!("direct {} runtime call", case.symbol), + &output, + &format!("standard library call `{}` is not supported", case.symbol), + ); + } +} + +fn assert_std_string_search_trim_facades(std_string: &str) { + assert!( + std_string.starts_with("(module string (export "), + "lib/std/string.slo must stay a source-authored module export" + ); + + let mut non_allowed_std = std_string.to_owned(); + for allowed in ALLOWED_STD_REFERENCES { + non_allowed_std = non_allowed_std.replace(allowed, ""); + } + assert!( + !non_allowed_std.contains("std."), + "std.string beta20 helpers must use only existing std.result bridges and promoted beta16-or-earlier std.string primitives" + ); + + for helper in STANDARD_STRING_SEARCH_TRIM_BETA20 { + assert!( + std_string.contains(&format!("(fn {} ", helper)), + "lib/std/string.slo is missing source facade `{}`", + helper + ); + } + + let search_trim_source = search_trim_source_region(std_string); + for primitive in [ + ("len", ["std.string.len", "(len "]), + ( + "byte_at_result", + ["std.string.byte_at_result", "(byte_at_result "], + ), + ( + "slice_result", + ["std.string.slice_result", "(slice_result "], + ), + ("starts_with", ["std.string.starts_with", "(starts_with "]), + ] { + assert!( + primitive + .1 + .iter() + .any(|needle| search_trim_source.contains(needle)), + "beta20 search/trim facades must compose over existing beta16 string primitive `{}`", + primitive.0 + ); + } + + assert!( + !std_string.contains("unicode") + && !std_string.contains("grapheme") + && !std_string.contains("locale") + && !std_string.contains("case_insensitive") + && !std_string.contains("regex"), + "beta20 string helpers must not claim deferred Unicode, locale, case-folding, or regex APIs" + ); +} + +fn search_trim_source_region(source: &str) -> &str { + let ends_with_end = function_range(source, "ends_with").1; + let parse_start = function_range(source, "parse_i32_result").0; + &source[ends_with_end..parse_start] +} + +fn function_range(source: &str, name: &str) -> (usize, usize) { + let needle = format!("(fn {} ", name); + let start = source + .find(&needle) + .unwrap_or_else(|| panic!("missing function `{}`", name)); + let mut depth = 0usize; + + for (offset, byte) in source.as_bytes()[start..].iter().enumerate() { + match byte { + b'(' => depth += 1, + b')' => { + depth = depth + .checked_sub(1) + .unwrap_or_else(|| panic!("unbalanced function `{}`", name)); + if depth == 0 { + return (start, start + offset + 1); + } + } + _ => {} + } + } + + panic!("unterminated function `{}`", name); +} + +fn run_glagol(args: I) -> Output +where + I: IntoIterator, + S: AsRef, +{ + Command::new(env!("CARGO_BIN_EXE_glagol")) + .args(args) + .current_dir(Path::new(env!("CARGO_MANIFEST_DIR"))) + .output() + .expect("run glagol") +} + +fn write_project(name: &str, source: &str) -> PathBuf { + let root = temp_root(name); + let src = root.join("src"); + fs::create_dir_all(&src).unwrap_or_else(|err| panic!("create `{}`: {}", src.display(), err)); + fs::write( + root.join("slovo.toml"), + format!( + "[project]\nname = \"{}\"\nsource_root = \"src\"\nentry = \"main\"\n", + name + ), + ) + .unwrap_or_else(|err| panic!("write project manifest: {}", err)); + fs::write(src.join("main.slo"), source.trim_start()) + .unwrap_or_else(|err| panic!("write project main.slo: {}", err)); + root +} + +fn write_fixture(name: &str, source: &str) -> PathBuf { + let mut path = env::temp_dir(); + path.push(format!( + "glagol-standard-string-search-trim-beta20-{}-{}-{}.slo", + name, + std::process::id(), + NEXT_TEMP_ID.fetch_add(1, Ordering::Relaxed) + )); + fs::write(&path, source.trim_start()) + .unwrap_or_else(|err| panic!("write `{}`: {}", path.display(), err)); + path +} + +fn temp_root(name: &str) -> PathBuf { + let root = env::temp_dir().join(format!( + "glagol-standard-string-search-trim-beta20-{}-{}-{}", + name, + std::process::id(), + NEXT_TEMP_ID.fetch_add(1, Ordering::Relaxed) + )); + let _ = fs::remove_dir_all(&root); + fs::create_dir_all(&root).unwrap_or_else(|err| panic!("create `{}`: {}", root.display(), err)); + root +} + +fn std_string_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("../lib/std/string.slo") +} + +fn read(path: &Path) -> String { + fs::read_to_string(path).unwrap_or_else(|err| panic!("read `{}`: {}", path.display(), err)) +} + +fn assert_success(context: &str, output: &Output) { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + output.status.success(), + "{} failed\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}", + context, + output.status.code(), + stdout, + stderr + ); + assert!(stderr.is_empty(), "{} wrote stderr:\n{}", context, stderr); +} + +fn assert_success_stdout(output: Output, expected: &str, context: &str) { + assert_success(context, &output); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout, expected, "{}", context); +} + +fn assert_failure_stderr_contains(context: &str, output: &Output, needle: &str) { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + !output.status.success(), + "{} unexpectedly passed\nstdout:\n{}\nstderr:\n{}", + context, + stdout, + stderr + ); + assert!( + stdout.is_empty(), + "{} rejected compile wrote stdout:\n{}", + context, + stdout + ); + assert!( + stderr.contains(needle), + "{} stderr did not contain `{}`:\n{}", + context, + needle, + stderr + ); +} + +struct UnsupportedRuntimeCase { + name: &'static str, + symbol: &'static str, + source: &'static str, +} diff --git a/compiler/tests/standard_string_source_fallback_helpers_alpha.rs b/compiler/tests/standard_string_source_fallback_helpers_alpha.rs index 05e906d..4f268b3 100644 --- a/compiler/tests/standard_string_source_fallback_helpers_alpha.rs +++ b/compiler/tests/standard_string_source_fallback_helpers_alpha.rs @@ -15,8 +15,10 @@ const EXPECTED_TEST_OUTPUT: &str = concat!( "test \"explicit local string parse integer fallbacks\" ... ok\n", "test \"explicit local string parse float bool fallbacks\" ... ok\n", "test \"explicit local string parse custom fallbacks\" ... ok\n", + "test \"explicit local string search helpers\" ... ok\n", + "test \"explicit local string ascii trim helpers\" ... ok\n", "test \"explicit local string helpers all\" ... ok\n", - "10 test(s) passed\n", + "12 test(s) passed\n", ); const STANDARD_STRING_SOURCE_FALLBACK_HELPERS_ALPHA: &[&str] = &[ @@ -26,6 +28,12 @@ const STANDARD_STRING_SOURCE_FALLBACK_HELPERS_ALPHA: &[&str] = &[ "slice_result", "starts_with", "ends_with", + "contains", + "index_of_option", + "last_index_of_option", + "trim_ascii_start", + "trim_ascii_end", + "trim_ascii", "parse_i32_result", "parse_i32_option", "parse_u32_result", @@ -121,11 +129,11 @@ fn assert_local_string_fixture_is_source_authored(project: &Path) { "string fixture must use only the existing promoted std.string runtime names" ); assert!( - !string.contains("trim") - && !string.contains("locale") + !string.contains("locale") && !string.contains("unicode") && !string.contains("bytes") && !string.contains("case_insensitive") + && !string.contains("regex") && !string.contains("host_error"), "string fixture must not claim deferred parsing or richer error APIs" ); diff --git a/docs/POST_BETA_ROADMAP.md b/docs/POST_BETA_ROADMAP.md index 35180c1..8608262 100644 --- a/docs/POST_BETA_ROADMAP.md +++ b/docs/POST_BETA_ROADMAP.md @@ -108,6 +108,16 @@ generated from `lib/std/*.slo` and guarded by `scripts/release-gate.sh`. that composes `std.fs`, `std.string`, `std.math`, and `std.io` through explicit standard imports, with focused check/test/doc/run coverage. +Released in `1.0.0-beta.20`: source-authored `std.string` search and ASCII +trim helpers. The scope adds `contains`, `index_of_option`, +`last_index_of_option`, `trim_ascii_start`, `trim_ascii_end`, and +`trim_ascii` over existing byte-oriented string primitives, with explicit +`std.string` import examples and focused compiler gates. Empty needles match +at first index `0` and last index `(len value)`; ASCII trimming removes only +bytes `9`, `10`, `11`, `12`, `13`, and `32`. Unicode/grapheme semantics, +case folding, regexes, tokenizers, mutable strings, slice/view syntax, new +runtime names, and stable stdlib/API promises remain deferred. + Why third: stdlib growth is already broad enough that naming and stability tiers matter more than adding another isolated helper group. diff --git a/docs/compiler/RELEASE_NOTES.md b/docs/compiler/RELEASE_NOTES.md index 162a412..9fa8b4f 100644 --- a/docs/compiler/RELEASE_NOTES.md +++ b/docs/compiler/RELEASE_NOTES.md @@ -12,6 +12,41 @@ integration/readiness release, not the first real beta. No active unreleased compiler scope is documented here yet. +## 1.0.0-beta.20 + +Release label: `1.0.0-beta.20` + +Release date: 2026-05-23 + +Release state: string search and ASCII trim foundation + +### Summary + +The beta.20 compiler/tooling slice adds focused coverage for source-authored +`std.string` search and ASCII trim helpers without adding compiler-known +runtime names: + +- Bump the `glagol` compiler package version to `1.0.0-beta.20`. +- Add focused `standard_string_search_trim_beta20` coverage for explicit + `std.string` imports of `contains`, `index_of_option`, + `last_index_of_option`, `trim_ascii_start`, `trim_ascii_end`, and + `trim_ascii`. +- Require the new public helpers to remain source facades over beta16-or-earlier + string primitives and existing result/option shapes. +- Gate direct `std.string.contains`, `std.string.index_of_option`, + `std.string.last_index_of_option`, `std.string.trim_ascii_start`, + `std.string.trim_ascii_end`, and `std.string.trim_ascii` runtime calls as + unsupported compiler-known names. +- Add the focused beta20 test to `scripts/release-gate.sh`. + +### Explicit Deferrals + +This release does not implement new compiler-known `std.*` runtime names, +runtime C changes, source-language syntax, Unicode or grapheme semantics, +locale/case-folded search, regex, tokenizer/parser APIs, stable string +ABI/layout, stable allocation ownership rules, or a stable standard-library +compatibility contract. + ## 1.0.0-beta.19 Release label: `1.0.0-beta.19` diff --git a/docs/compiler/ROADMAP.md b/docs/compiler/ROADMAP.md index db428af..2eb5a70 100644 --- a/docs/compiler/ROADMAP.md +++ b/docs/compiler/ROADMAP.md @@ -22,8 +22,8 @@ general-purpose beta release. A Glagol feature is done only when it has parser/lowerer support, checker behavior, diagnostics for invalid forms, backend behavior or explicit unsupported diagnostics, and tests. -Current stage: `1.0.0-beta.19`, released on 2026-05-23 as a test discovery -and user-project conformance foundation. It keeps the +Current stage: `1.0.0-beta.20`, released on 2026-05-23 as a string search +and ASCII trim foundation. It keeps the `1.0.0-beta` language/compiler support baseline and includes the `1.0.0-beta.1` tooling hardening release, the `1.0.0-beta.2` runtime/resource foundation release, the `1.0.0-beta.3` standard-library stabilization release, @@ -97,6 +97,14 @@ single-file, project, and workspace ordering, honors `--filter `, and avoids executing test bodies. It keeps normal test execution output unchanged when `--list` is absent. +The beta.20 compiler/tooling slice bumps the package version and gates +source-authored `std.string` facades for `contains`, `index_of_option`, +`last_index_of_option`, `trim_ascii_start`, `trim_ascii_end`, and `trim_ascii` +through explicit `std.string` imports. It keeps those helpers composed over +beta16-or-earlier string primitives and existing option/result shapes, and +verifies that direct compiler-known runtime calls for the new helper names +remain unsupported. + Generic vectors, generic collections, maps, sets, generic stdlib dispatch, runtime collection changes, collection unification, stable human diagnostic text, stable artifact-manifest or Markdown schema freezes, LSP/watch diff --git a/docs/language/RELEASE_NOTES.md b/docs/language/RELEASE_NOTES.md index 06b2b62..12b333c 100644 --- a/docs/language/RELEASE_NOTES.md +++ b/docs/language/RELEASE_NOTES.md @@ -8,7 +8,7 @@ Historical `exp-*` releases listed here are experimental maturity milestones. The pushed tag `v2.0.0-beta.1` is historical. It is now documented as an experimental integration/readiness release, not as a beta maturity claim. -The current release is `1.0.0-beta.19`, published on 2026-05-23. It keeps the +The current release is `1.0.0-beta.20`, published on 2026-05-23. It keeps the `1.0.0-beta` language surface, includes the first post-beta tooling/install hardening bundle from `1.0.0-beta.1`, and adds the first runtime/resource foundation bundle from `1.0.0-beta.2` plus the first standard-library @@ -28,13 +28,45 @@ collection alias unification and generic reservation slice from collection ledger from `1.0.0-beta.15`, plus the string scanning and token boundary foundation from `1.0.0-beta.16`, and the JSON primitive scalar parsing foundation from `1.0.0-beta.17`, plus the JSON string token parsing -foundation from `1.0.0-beta.18`, and the test discovery and user-project -conformance foundation from `1.0.0-beta.19`. +foundation from `1.0.0-beta.18`, the test discovery and user-project +conformance foundation from `1.0.0-beta.19`, and the string search and ASCII +trim foundation from `1.0.0-beta.20`. ## Unreleased No active unreleased language scope is documented here yet. +## 1.0.0-beta.20 + +Release label: `1.0.0-beta.20` + +Release name: String Search And ASCII Trim Foundation + +Release date: 2026-05-23 + +Status: released beta standard-library string helper foundation on the +`1.0.0-beta` language baseline. + +The source facade adds these source-authored `std.string` helpers: + +- `contains ((value string) (needle string)) -> bool` +- `index_of_option ((value string) (needle string)) -> (option i32)` +- `last_index_of_option ((value string) (needle string)) -> (option i32)` +- `trim_ascii_start ((value string)) -> string` +- `trim_ascii_end ((value string)) -> string` +- `trim_ascii ((value string)) -> string` + +Search remains byte-oriented over current runtime strings. Empty needles match: +`index_of_option` returns `some 0`, `last_index_of_option` returns +`some (len value)`, and `contains` returns `true`. Missing needles return +`none`. + +The ASCII trim helpers remove only bytes `9`, `10`, `11`, `12`, `13`, and +`32` from the requested edges. This release adds no compiler-known runtime +names, Unicode/grapheme semantics, case folding, locale-sensitive matching, +regular expressions, tokenizer APIs, language slice/view syntax, mutable +strings, stable ABI/layout guarantees, or stable stdlib/API freeze. + ## 1.0.0-beta.19 Release label: `1.0.0-beta.19` diff --git a/docs/language/ROADMAP.md b/docs/language/ROADMAP.md index 40a0d82..41b0b26 100644 --- a/docs/language/ROADMAP.md +++ b/docs/language/ROADMAP.md @@ -10,8 +10,8 @@ Long-horizon planning lives in release train from the historical `v2.0.0-beta.1` tag toward and beyond the first real general-purpose beta Slovo contract. -Current stage: `1.0.0-beta.19`, released on 2026-05-23 as a post-beta test -discovery and user-project conformance foundation. It keeps the +Current stage: `1.0.0-beta.20`, released on 2026-05-23 as a post-beta string +search and ASCII trim foundation. It keeps the `1.0.0-beta` language contract and includes the `1.0.0-beta.1` tooling hardening release, the `1.0.0-beta.2` runtime/resource foundation release, the `1.0.0-beta.3` @@ -26,8 +26,9 @@ documentation, `1.0.0-beta.12` concrete vector helper parity, benchmark suite catalog and metadata gate, `1.0.0-beta.15` reserved generic collection boundary hardening and collection ledger, and `1.0.0-beta.16` string scanning and token boundary helpers, `1.0.0-beta.17` JSON primitive -scalar token parsing, `1.0.0-beta.18` JSON string token parsing, and -`1.0.0-beta.19` test discovery and user-project conformance tooling. +scalar token parsing, `1.0.0-beta.18` JSON string token parsing, +`1.0.0-beta.19` test discovery and user-project conformance tooling, and +`1.0.0-beta.20` string search and ASCII trim helpers. `1.0.0-beta.16` adds `std.string` source facades and examples for `byte_at_result`, `slice_result`, `starts_with`, and `ends_with`. These helpers @@ -76,6 +77,14 @@ freeze, stable standard-library/API compatibility freeze, registry semantics, semver solving, performance claims, mutable vectors, language slice/view APIs, additional runtime names, or Unicode/grapheme semantics. +`1.0.0-beta.20` adds source-authored `std.string` helpers for byte-oriented +`contains`, first/last index option search, and ASCII edge trimming over bytes +`9`, `10`, `11`, `12`, `13`, and `32`. Empty needles match at first index `0` +and last index `(len value)`. The scope adds no compiler-known runtime names, +Unicode/grapheme semantics, case folding, regexes, tokenizer APIs, language +slice/view syntax, mutable strings, stable ABI/layout guarantees, or stable +stdlib/API freeze. + The final experimental precursor scope is `exp-125`, defined in `.llm/EXP_125_UNSIGNED_U32_U64_NUMERIC_AND_STDLIB_BREADTH_ALPHA.md`. Its unsigned direct-value flow, parse/format runtime lanes, and matching staged diff --git a/docs/language/SPEC-v1.md b/docs/language/SPEC-v1.md index 40371cc..e0acf6d 100644 --- a/docs/language/SPEC-v1.md +++ b/docs/language/SPEC-v1.md @@ -9,8 +9,9 @@ diagnostic catalog and schema policy update, and `1.0.0-beta.14` benchmark suite catalog and metadata gate, `1.0.0-beta.15` reserved generic collection boundary hardening and collection ledger, `1.0.0-beta.16` string scanning and token boundary foundation, `1.0.0-beta.17` JSON primitive scalar parsing -foundation, `1.0.0-beta.18` JSON string token parsing foundation, and -`1.0.0-beta.19` test discovery and user-project conformance tooling. The +foundation, `1.0.0-beta.18` JSON string token parsing foundation, +`1.0.0-beta.19` test discovery and user-project conformance tooling, and +`1.0.0-beta.20` string search and ASCII trim foundation. The language contract integrates promoted language slices through `exp-125` and the historical publication @@ -228,6 +229,17 @@ Current v1 release surface and explicit experimental targets: coverage/event streams, stable artifact-manifest or Markdown schemas, LSP/watch behavior, SARIF/daemon protocols, package registries, semver solving, or performance claims +- `1.0.0-beta.20` string search and ASCII trim target: + source-authored `std.string.contains`, `std.string.index_of_option`, + `std.string.last_index_of_option`, `std.string.trim_ascii_start`, + `std.string.trim_ascii_end`, and `std.string.trim_ascii` compose over the + existing byte-oriented string primitives; search is byte-oriented, missing + needles return `none`, empty needles match at first index `0` and last index + `(len value)`, and trim removes only bytes `9`, `10`, `11`, `12`, `13`, and + `32` from the requested edges. This target does not add compiler-known + runtime names, Unicode/grapheme semantics, case folding, locale-sensitive + matching, regex, tokenizer APIs, language slice/view syntax, mutable + strings, stable ABI/layout, performance claims, or stable stdlib/API freeze - `exp-1` owned runtime strings: compiler-known `std.string.concat` accepts two `string` values and returns an immutable runtime-owned `string`; existing string equality, length, printing, locals, parameters, returns, and calls work diff --git a/docs/language/STDLIB_API.md b/docs/language/STDLIB_API.md index 7b27915..1521b38 100644 --- a/docs/language/STDLIB_API.md +++ b/docs/language/STDLIB_API.md @@ -6,7 +6,7 @@ Do not edit this file by hand. ## Stability Tiers - `beta-supported`: exported from `lib/std` and covered by source-search, promotion, or facade gates in the current beta line. -- `experimental`: not used for exported `lib/std` helpers in `1.0.0-beta.19`; future releases may mark new helpers this way before they graduate. +- `experimental`: not used for exported `lib/std` helpers in `1.0.0-beta.20`; future releases may mark new helpers this way before they graduate. - `internal`: helper names that are not exported from their module; they are intentionally omitted from this catalog. The catalog is a beta API discovery aid, not a stable `1.0.0` standard-library freeze. @@ -16,7 +16,7 @@ Only exported `(fn ...)` helpers are listed; `(type ...)` aliases and non-export ## Summary - Modules: 19 -- Exported helper signatures: 590 +- Exported helper signatures: 596 - Exported type aliases omitted: 0 - Default tier: `beta-supported` @@ -487,7 +487,7 @@ Only exported `(fn ...)` helpers are listed; `(type ...)` aliases and non-export - Path: `lib/std/string.slo` - Tier: `beta-supported` -- Exported helper signatures: 30 +- Exported helper signatures: 36 - `len ((value string)) -> i32` - `concat ((left string) (right string)) -> string` @@ -495,6 +495,12 @@ Only exported `(fn ...)` helpers are listed; `(type ...)` aliases and non-export - `slice_result ((value string) (start i32) (count i32)) -> (result string i32)` - `starts_with ((value string) (prefix string)) -> bool` - `ends_with ((value string) (suffix string)) -> bool` +- `contains ((value string) (needle string)) -> bool` +- `index_of_option ((value string) (needle string)) -> (option i32)` +- `last_index_of_option ((value string) (needle string)) -> (option i32)` +- `trim_ascii_start ((value string)) -> string` +- `trim_ascii_end ((value string)) -> string` +- `trim_ascii ((value string)) -> string` - `parse_i32_result ((value string)) -> (result i32 i32)` - `parse_i32_option ((value string)) -> (option i32)` - `parse_u32_result ((value string)) -> (result u32 i32)` diff --git a/docs/language/examples/projects/std-import-string/src/main.slo b/docs/language/examples/projects/std-import-string/src/main.slo index 74b36c2..1191361 100644 --- a/docs/language/examples/projects/std-import-string/src/main.slo +++ b/docs/language/examples/projects/std-import-string/src/main.slo @@ -1,6 +1,6 @@ (module main) -(import std.string (len concat byte_at_result slice_result starts_with ends_with parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) +(import std.string (len concat byte_at_result slice_result starts_with ends_with contains index_of_option last_index_of_option trim_ascii_start trim_ascii_end trim_ascii parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) (fn imported_string_concat () -> string (concat "slo" "vo")) @@ -187,6 +187,50 @@ false) false)) +(fn imported_option_i32_is_some_value ((actual (option i32)) (expected i32)) -> bool + (match actual + ((some payload) + (= payload expected)) + ((none) + false))) + +(fn imported_option_i32_is_none ((actual (option i32))) -> bool + (match actual + ((some payload) + false) + ((none) + true))) + +(fn imported_string_search_ok () -> bool + (if (contains "alpha beta alpha" "beta") + (if (contains "alpha" "z") + false + (if (contains "alpha" "") + (if (imported_option_i32_is_some_value (index_of_option "alpha beta alpha" "alpha") 0) + (if (imported_option_i32_is_none (index_of_option "alpha" "z")) + (if (imported_option_i32_is_some_value (index_of_option "alpha" "") 0) + (if (imported_option_i32_is_some_value (last_index_of_option "alpha beta alpha" "alpha") 11) + (if (imported_option_i32_is_some_value (last_index_of_option "alpha" "") 5) + (imported_option_i32_is_none (last_index_of_option "alpha" "z")) + false) + false) + false) + false) + false) + false)) + false)) + +(fn imported_string_ascii_trim_ok () -> bool + (if (= (trim_ascii_start " \t\nslovo") "slovo") + (if (= (trim_ascii_end "slovo \t\n") "slovo") + (if (= (trim_ascii " \t\nslovo \t\n") "slovo") + (if (= (trim_ascii " \t\n") "") + (= (trim_ascii "slovo") "slovo") + false) + false) + false) + false)) + (fn imported_string_helpers_ok () -> bool (if (= (imported_string_len_concat_score) 42) (if (imported_string_byte_at_ok) @@ -196,7 +240,11 @@ (if (imported_string_parse_options_ok) (if (imported_string_parse_integer_fallbacks_ok) (if (imported_string_parse_float_bool_fallbacks_ok) - (imported_string_parse_custom_fallbacks_ok) + (if (imported_string_parse_custom_fallbacks_ok) + (if (imported_string_search_ok) + (imported_string_ascii_trim_ok) + false) + false) false) false) false) @@ -238,5 +286,11 @@ (test "explicit std string parse custom fallbacks" (imported_string_parse_custom_fallbacks_ok)) +(test "explicit std string search helpers" + (imported_string_search_ok)) + +(test "explicit std string ascii trim helpers" + (imported_string_ascii_trim_ok)) + (test "explicit std string helpers all" (= (main) 42)) diff --git a/examples/projects/std-import-string/src/main.slo b/examples/projects/std-import-string/src/main.slo index 74b36c2..1191361 100644 --- a/examples/projects/std-import-string/src/main.slo +++ b/examples/projects/std-import-string/src/main.slo @@ -1,6 +1,6 @@ (module main) -(import std.string (len concat byte_at_result slice_result starts_with ends_with parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) +(import std.string (len concat byte_at_result slice_result starts_with ends_with contains index_of_option last_index_of_option trim_ascii_start trim_ascii_end trim_ascii parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) (fn imported_string_concat () -> string (concat "slo" "vo")) @@ -187,6 +187,50 @@ false) false)) +(fn imported_option_i32_is_some_value ((actual (option i32)) (expected i32)) -> bool + (match actual + ((some payload) + (= payload expected)) + ((none) + false))) + +(fn imported_option_i32_is_none ((actual (option i32))) -> bool + (match actual + ((some payload) + false) + ((none) + true))) + +(fn imported_string_search_ok () -> bool + (if (contains "alpha beta alpha" "beta") + (if (contains "alpha" "z") + false + (if (contains "alpha" "") + (if (imported_option_i32_is_some_value (index_of_option "alpha beta alpha" "alpha") 0) + (if (imported_option_i32_is_none (index_of_option "alpha" "z")) + (if (imported_option_i32_is_some_value (index_of_option "alpha" "") 0) + (if (imported_option_i32_is_some_value (last_index_of_option "alpha beta alpha" "alpha") 11) + (if (imported_option_i32_is_some_value (last_index_of_option "alpha" "") 5) + (imported_option_i32_is_none (last_index_of_option "alpha" "z")) + false) + false) + false) + false) + false) + false)) + false)) + +(fn imported_string_ascii_trim_ok () -> bool + (if (= (trim_ascii_start " \t\nslovo") "slovo") + (if (= (trim_ascii_end "slovo \t\n") "slovo") + (if (= (trim_ascii " \t\nslovo \t\n") "slovo") + (if (= (trim_ascii " \t\n") "") + (= (trim_ascii "slovo") "slovo") + false) + false) + false) + false)) + (fn imported_string_helpers_ok () -> bool (if (= (imported_string_len_concat_score) 42) (if (imported_string_byte_at_ok) @@ -196,7 +240,11 @@ (if (imported_string_parse_options_ok) (if (imported_string_parse_integer_fallbacks_ok) (if (imported_string_parse_float_bool_fallbacks_ok) - (imported_string_parse_custom_fallbacks_ok) + (if (imported_string_parse_custom_fallbacks_ok) + (if (imported_string_search_ok) + (imported_string_ascii_trim_ok) + false) + false) false) false) false) @@ -238,5 +286,11 @@ (test "explicit std string parse custom fallbacks" (imported_string_parse_custom_fallbacks_ok)) +(test "explicit std string search helpers" + (imported_string_search_ok)) + +(test "explicit std string ascii trim helpers" + (imported_string_ascii_trim_ok)) + (test "explicit std string helpers all" (= (main) 42)) diff --git a/examples/projects/std-layout-local-string/src/main.slo b/examples/projects/std-layout-local-string/src/main.slo index 34c2f97..b5f75b3 100644 --- a/examples/projects/std-layout-local-string/src/main.slo +++ b/examples/projects/std-layout-local-string/src/main.slo @@ -1,6 +1,6 @@ (module main) -(import string (len concat byte_at_result slice_result starts_with ends_with parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) +(import string (len concat byte_at_result slice_result starts_with ends_with contains index_of_option last_index_of_option trim_ascii_start trim_ascii_end trim_ascii parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) (fn imported_string_concat () -> string (concat "slo" "vo")) @@ -187,6 +187,50 @@ false) false)) +(fn imported_option_i32_is_some_value ((actual (option i32)) (expected i32)) -> bool + (match actual + ((some payload) + (= payload expected)) + ((none) + false))) + +(fn imported_option_i32_is_none ((actual (option i32))) -> bool + (match actual + ((some payload) + false) + ((none) + true))) + +(fn imported_string_search_ok () -> bool + (if (contains "alpha beta alpha" "beta") + (if (contains "alpha" "z") + false + (if (contains "alpha" "") + (if (imported_option_i32_is_some_value (index_of_option "alpha beta alpha" "alpha") 0) + (if (imported_option_i32_is_none (index_of_option "alpha" "z")) + (if (imported_option_i32_is_some_value (index_of_option "alpha" "") 0) + (if (imported_option_i32_is_some_value (last_index_of_option "alpha beta alpha" "alpha") 11) + (if (imported_option_i32_is_some_value (last_index_of_option "alpha" "") 5) + (imported_option_i32_is_none (last_index_of_option "alpha" "z")) + false) + false) + false) + false) + false) + false)) + false)) + +(fn imported_string_ascii_trim_ok () -> bool + (if (= (trim_ascii_start " \t\nslovo") "slovo") + (if (= (trim_ascii_end "slovo \t\n") "slovo") + (if (= (trim_ascii " \t\nslovo \t\n") "slovo") + (if (= (trim_ascii " \t\n") "") + (= (trim_ascii "slovo") "slovo") + false) + false) + false) + false)) + (fn imported_string_helpers_ok () -> bool (if (= (imported_string_len_concat_score) 42) (if (imported_string_byte_at_ok) @@ -196,7 +240,11 @@ (if (imported_string_parse_options_ok) (if (imported_string_parse_integer_fallbacks_ok) (if (imported_string_parse_float_bool_fallbacks_ok) - (imported_string_parse_custom_fallbacks_ok) + (if (imported_string_parse_custom_fallbacks_ok) + (if (imported_string_search_ok) + (imported_string_ascii_trim_ok) + false) + false) false) false) false) @@ -238,5 +286,11 @@ (test "explicit local string parse custom fallbacks" (imported_string_parse_custom_fallbacks_ok)) +(test "explicit local string search helpers" + (imported_string_search_ok)) + +(test "explicit local string ascii trim helpers" + (imported_string_ascii_trim_ok)) + (test "explicit local string helpers all" (= (main) 42)) diff --git a/examples/projects/std-layout-local-string/src/string.slo b/examples/projects/std-layout-local-string/src/string.slo index d324979..e445143 100644 --- a/examples/projects/std-layout-local-string/src/string.slo +++ b/examples/projects/std-layout-local-string/src/string.slo @@ -1,4 +1,4 @@ -(module string (export len concat byte_at_result slice_result starts_with ends_with parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) +(module string (export len concat byte_at_result slice_result starts_with ends_with contains index_of_option last_index_of_option trim_ascii_start trim_ascii_end trim_ascii parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) (import result (ok_or_none_i32 ok_or_none_u32 ok_or_none_i64 ok_or_none_u64 ok_or_none_f64 ok_or_none_bool)) @@ -20,6 +20,99 @@ (fn ends_with ((value string) (suffix string)) -> bool (std.string.ends_with value suffix)) +(fn suffix_starts_with ((value string) (needle string) (position i32) (value_len i32)) -> bool + (match (slice_result value position (- value_len position)) + ((ok text) + (starts_with text needle)) + ((err code) + false))) + +(fn index_of_option ((value string) (needle string)) -> (option i32) + (let value_len i32 (len value)) + (let needle_len i32 (len needle)) + (let max_start i32 (- value_len needle_len)) + (var position i32 0) + (var found_position i32 -1) + (while (and (> needle_len 0) (and (< found_position 0) (<= position max_start))) + (set found_position (if (suffix_starts_with value needle position value_len) + position + found_position)) + (set position (+ position 1))) + (if (= needle_len 0) + (some i32 0) + (if (< found_position 0) + (none i32) + (some i32 found_position)))) + +(fn last_index_of_option ((value string) (needle string)) -> (option i32) + (let value_len i32 (len value)) + (let needle_len i32 (len needle)) + (let max_start i32 (- value_len needle_len)) + (var position i32 0) + (var found_position i32 -1) + (while (and (> needle_len 0) (<= position max_start)) + (set found_position (if (suffix_starts_with value needle position value_len) + position + found_position)) + (set position (+ position 1))) + (if (= needle_len 0) + (some i32 value_len) + (if (< found_position 0) + (none i32) + (some i32 found_position)))) + +(fn contains ((value string) (needle string)) -> bool + (match (index_of_option value needle) + ((some position) + true) + ((none) + false))) + +(fn is_ascii_trim_byte ((value i32)) -> bool + (if (= value 9) + true + (if (= value 10) + true + (if (= value 11) + true + (if (= value 12) + true + (if (= value 13) + true + (= value 32))))))) + +(fn byte_is_ascii_trim ((value string) (position i32)) -> bool + (match (byte_at_result value position) + ((ok byte) + (is_ascii_trim_byte byte)) + ((err code) + false))) + +(fn trim_ascii_start ((value string)) -> string + (let value_len i32 (len value)) + (var start i32 0) + (while (and (< start value_len) (byte_is_ascii_trim value start)) + (set start (+ start 1))) + (match (slice_result value start (- value_len start)) + ((ok text) + text) + ((err code) + value))) + +(fn trim_ascii_end ((value string)) -> string + (let value_len i32 (len value)) + (var end i32 value_len) + (while (and (> end 0) (byte_is_ascii_trim value (- end 1))) + (set end (- end 1))) + (match (slice_result value 0 end) + ((ok text) + text) + ((err code) + value))) + +(fn trim_ascii ((value string)) -> string + (trim_ascii_end (trim_ascii_start value))) + (fn parse_i32_result ((value string)) -> (result i32 i32) (std.string.parse_i32_result value)) diff --git a/lib/std/README.md b/lib/std/README.md index 2733f83..eaa6d26 100644 --- a/lib/std/README.md +++ b/lib/std/README.md @@ -22,7 +22,8 @@ helpers updated through `exp-54`, `exp-55`, and `exp-72`; CLI option helpers updated through `exp-110`; CLI local-source gate aligned in `exp-78`; string fallback helpers updated through `exp-60` and `exp-68`; string option helpers updated through `exp-110`; string -byte-scanning and token-boundary helpers updated in `1.0.0-beta.16`; process fallback +byte-scanning and token-boundary helpers updated in `1.0.0-beta.16`; string +search and ASCII trim helpers updated in `1.0.0-beta.20`; process fallback helpers updated through `exp-61`; process typed helpers updated through `exp-67` and `exp-71`; process option helpers updated through `exp-110`; env fallback helpers updated through `exp-62` and `exp-69`; env typed helpers @@ -229,6 +230,12 @@ exp-125 target adds matching `parse_u32_result`, `parse_u32_option`, `1.0.0-beta.16` adds `byte_at_result`, `slice_result`, `starts_with`, and `ends_with` as byte-oriented helpers over current runtime strings. Invalid byte indexes and ranges return `err 1`; empty prefixes and suffixes match. +`1.0.0-beta.20` adds `contains`, +`index_of_option`, `last_index_of_option`, `trim_ascii_start`, +`trim_ascii_end`, and `trim_ascii` as ordinary source helpers over those same +byte primitives. Empty needles match at first index `0` and last index +`(len value)`. ASCII trim removes only bytes `9`, `10`, `11`, `12`, `13`, and +`32` from the requested edges. This string scanning foundation does not add Unicode scalar or grapheme semantics, full JSON parsing, tokenizer objects, language-level slice/view syntax, mutable strings, stable ABI/layout promises, or a stable stdlib/API diff --git a/lib/std/string.slo b/lib/std/string.slo index 01ce700..36e4756 100644 --- a/lib/std/string.slo +++ b/lib/std/string.slo @@ -1,4 +1,4 @@ -(module string (export len concat byte_at_result slice_result starts_with ends_with parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) +(module string (export len concat byte_at_result slice_result starts_with ends_with contains index_of_option last_index_of_option trim_ascii_start trim_ascii_end trim_ascii parse_i32_result parse_i32_option parse_u32_result parse_u32_option parse_i64_result parse_i64_option parse_u64_result parse_u64_option parse_f64_result parse_f64_option parse_bool_result parse_bool_option parse_i32_or_zero parse_u32_or_zero parse_i64_or_zero parse_u64_or_zero parse_f64_or_zero parse_bool_or_false parse_i32_or parse_u32_or parse_i64_or parse_u64_or parse_f64_or parse_bool_or)) (import std.result (ok_or_none_i32 ok_or_none_u32 ok_or_none_i64 ok_or_none_u64 ok_or_none_f64 ok_or_none_bool)) @@ -20,6 +20,99 @@ (fn ends_with ((value string) (suffix string)) -> bool (std.string.ends_with value suffix)) +(fn suffix_starts_with ((value string) (needle string) (position i32) (value_len i32)) -> bool + (match (slice_result value position (- value_len position)) + ((ok text) + (starts_with text needle)) + ((err code) + false))) + +(fn index_of_option ((value string) (needle string)) -> (option i32) + (let value_len i32 (len value)) + (let needle_len i32 (len needle)) + (let max_start i32 (- value_len needle_len)) + (var position i32 0) + (var found_position i32 -1) + (while (and (> needle_len 0) (and (< found_position 0) (<= position max_start))) + (set found_position (if (suffix_starts_with value needle position value_len) + position + found_position)) + (set position (+ position 1))) + (if (= needle_len 0) + (some i32 0) + (if (< found_position 0) + (none i32) + (some i32 found_position)))) + +(fn last_index_of_option ((value string) (needle string)) -> (option i32) + (let value_len i32 (len value)) + (let needle_len i32 (len needle)) + (let max_start i32 (- value_len needle_len)) + (var position i32 0) + (var found_position i32 -1) + (while (and (> needle_len 0) (<= position max_start)) + (set found_position (if (suffix_starts_with value needle position value_len) + position + found_position)) + (set position (+ position 1))) + (if (= needle_len 0) + (some i32 value_len) + (if (< found_position 0) + (none i32) + (some i32 found_position)))) + +(fn contains ((value string) (needle string)) -> bool + (match (index_of_option value needle) + ((some position) + true) + ((none) + false))) + +(fn is_ascii_trim_byte ((value i32)) -> bool + (if (= value 9) + true + (if (= value 10) + true + (if (= value 11) + true + (if (= value 12) + true + (if (= value 13) + true + (= value 32))))))) + +(fn byte_is_ascii_trim ((value string) (position i32)) -> bool + (match (byte_at_result value position) + ((ok byte) + (is_ascii_trim_byte byte)) + ((err code) + false))) + +(fn trim_ascii_start ((value string)) -> string + (let value_len i32 (len value)) + (var start i32 0) + (while (and (< start value_len) (byte_is_ascii_trim value start)) + (set start (+ start 1))) + (match (slice_result value start (- value_len start)) + ((ok text) + text) + ((err code) + value))) + +(fn trim_ascii_end ((value string)) -> string + (let value_len i32 (len value)) + (var end i32 value_len) + (while (and (> end 0) (byte_is_ascii_trim value (- end 1))) + (set end (- end 1))) + (match (slice_result value 0 end) + ((ok text) + text) + ((err code) + value))) + +(fn trim_ascii ((value string)) -> string + (trim_ascii_end (trim_ascii_start value))) + (fn parse_i32_result ((value string)) -> (result i32 i32) (std.string.parse_i32_result value)) diff --git a/scripts/release-gate.sh b/scripts/release-gate.sh index ca21ed4..41007b0 100755 --- a/scripts/release-gate.sh +++ b/scripts/release-gate.sh @@ -70,6 +70,7 @@ cargo test --test standard_string_scanning_beta16 cargo test --test standard_json_scalar_parsing_beta17 cargo test --test standard_json_string_parsing_beta18 cargo test --test test_discovery_beta19 +cargo test --test standard_string_search_trim_beta20 # Full cargo test includes unignored integration gates such as dx_v1_7, # beta_v2_0_0_beta_1, and beta_1_0_0. cargo test