From aa236879f67704bdb34767e869d88fcd8f7dfcba Mon Sep 17 00:00:00 2001 From: Yuriy Lazaryev Date: Fri, 24 Apr 2026 18:19:53 +0200 Subject: [PATCH 1/4] Add negative golden tests for invalid-identifier parse errors (#7742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Freeze the current (unhelpful) error output for three forms of invalid UPLC identifier: - `foo-bar` — hyphen followed by non-digits - `foo-123-456` — double `-NNN` suffix - `pubKeyHash-305478r71` — hyphen + digits + more letters (the shape Scalus 0.16.0's `toUplcOptimized` emits, from issue #7742) All three cases produce misleading diagnostics today — notably the Scalus case reports the error 8+ characters past the offending name. Capturing the status quo as goldens so that a follow-up improvement to name-parser diagnostics shows up as an explicit golden-file diff. --- .../invalid-identifier-double-unique.golden | 6 +++ .../invalid-identifier-hyphen-letters.golden | 6 +++ .../invalid-identifier-hyphen-word.golden | 6 +++ .../testlib/Generators/Spec.hs | 46 +++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden new file mode 100644 index 00000000000..2c1561edb6b --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden @@ -0,0 +1,6 @@ +test:1:28: + | +1 | (program 1.1.0 (lam foo-123-456 foo-123-456)) + | ^ +unexpected '-' +expecting '`', digit, opening bracket '[', or opening parenthesis '(' diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden new file mode 100644 index 00000000000..d0113c25b87 --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden @@ -0,0 +1,6 @@ +test:1:42: + | +1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) + | ^ +unexpected '(' +expecting closing parenthesis ')' diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden new file mode 100644 index 00000000000..f7d50bb245d --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden @@ -0,0 +1,6 @@ +test:1:24: + | +1 | (program 1.1.0 (lam foo-bar foo-bar)) + | ^ +unexpected '-' +expecting '`', identifier-unquoted, opening bracket '[', or opening parenthesis '(' diff --git a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs index b6036a90b52..c2671111706 100644 --- a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs +++ b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs @@ -60,6 +60,9 @@ test_parsing = , propMissingConOperands , propInvalidKeyword , propBracketMismatch + , propInvalidIdentifierHyphenLetters + , propInvalidIdentifierHyphenWord + , propInvalidIdentifierDoubleUnique ] ] @@ -241,6 +244,49 @@ propBracketMismatch = "bracket-mismatch" "(program 1.1.0 [(var x))" +{- Note [Negative identifier-grammar tests] +The parser's name grammar treats '-NNN' purely as the numeric unique-suffix: +'foo-123' → Name "foo" (Unique 123). A '-' anywhere else in an identifier is +not allowed by the unquoted grammar (see 'isIdentifierChar' in +'PlutusCore.Name.Unique'). Several tools in the wild (e.g. Scalus 0.16.0's +'toUplcOptimized') emit names like 'pubKeyHash-305478r71' that violate this, +and today the parser mis-parses them in a way that surfaces as a confusing +error hundreds of lines away from the offending name — see issue #7742. + +The goldens below freeze the *current* (unhelpful) error output so that a +future diagnostic improvement shows up as an explicit golden-file diff. +When the parser is taught to point at the bad name itself, accept the new +goldens with 'scripts/regen-goldens.sh' (or '--accept'). -} + +{-| @pubKeyHash-305478r71@ — the exact shape Scalus 0.16.0 produces, inside a +binder. Current behaviour: the parser eats @pubKeyHash-305478@ as name+unique, +picks up @r71@ as the lam body, then fails far away on the next paren. -} +propInvalidIdentifierHyphenLetters :: TestTree +propInvalidIdentifierHyphenLetters = + testParseErrorGolden + "Invalid identifier: hyphen followed by digits then letters" + "invalid-identifier-hyphen-letters" + "(program 1.1.0 (lam pubKeyHash-305478r71 (lam x x)))" + +{-| @foo-bar@ — hyphen followed by non-digits. Current behaviour: the parser +stops at '-' (it is not in 'isIdentifierChar'), takes @foo@ as the name, and +then explodes on @-bar@ which is not a valid continuation anywhere. -} +propInvalidIdentifierHyphenWord :: TestTree +propInvalidIdentifierHyphenWord = + testParseErrorGolden + "Invalid identifier: hyphen followed by non-digits" + "invalid-identifier-hyphen-word" + "(program 1.1.0 (lam foo-bar foo-bar))" + +{-| @foo-123-456@ — ambiguous double '-NNN' run. Current behaviour: the first +@-123@ wins as the unique, @-456@ is left over and fails the next check. -} +propInvalidIdentifierDoubleUnique :: TestTree +propInvalidIdentifierDoubleUnique = + testParseErrorGolden + "Invalid identifier: double unique-suffix" + "invalid-identifier-double-unique" + "(program 1.1.0 (lam foo-123-456 foo-123-456))" + -------------------------------------------------------------------------------- -- Helper Functions ------------------------------------------------------------ From 0001e2f1c85a67463d9d99fe655822a91cc21ddf Mon Sep 17 00:00:00 2001 From: Yuriy Lazaryev Date: Fri, 24 Apr 2026 18:28:58 +0200 Subject: [PATCH 2/4] Point parser diagnostics at the offending name (#7742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the unquoted-identifier parser finishes, require that the next char is a real word-boundary (not another identifier char and not another '-'). Otherwise the caller wrote something like `pubKeyHash-305478r71`, `foo-bar` or `foo-123-456`: the '-NNN' we just consumed as the numeric unique-suffix is not actually terminal, and the prefix interpretation would silently mis-parse. Consume the remainder of the extended identifier so the diagnostic can cite the full bad text, then raise a new `InvalidIdentifier` custom parser error with a caret on the start of the identifier and an actionable hint to quote it with backticks. For the original Scalus 0.16.0 HTLC reproducer this changes the error from `htlc.uplc:448:39: unexpected '(' expecting ')'` (on a lambda 8+ chars past the real site) to `htlc.uplc:447:41: Invalid identifier 'pubKeyHash-305478r71'` — on the offending name itself. The three negative goldens added in the previous commit are updated to the new message; all 3886 tests across plutus-core/untyped-plutus-core/ plutus-ir pass unchanged. --- ...aryev_issue_7742_uplc_parser_large_case.md | 12 +++++++++ .../plutus-core/src/PlutusCore/Error.hs | 18 +++++++++++++ .../src/PlutusCore/Parser/ParserCommon.hs | 27 ++++++++++++++++++- .../invalid-identifier-double-unique.golden | 9 ++++--- .../invalid-identifier-hyphen-letters.golden | 9 ++++--- .../invalid-identifier-hyphen-word.golden | 9 ++++--- 6 files changed, 71 insertions(+), 13 deletions(-) create mode 100644 plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md diff --git a/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md new file mode 100644 index 00000000000..c206686cc2c --- /dev/null +++ b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md @@ -0,0 +1,12 @@ +### Changed + +- The UPLC/PLC/PIR textual parser now rejects unquoted identifiers that + contain a `-` anywhere other than as the terminal numeric unique-suffix + separator (e.g. `pubKeyHash-305478r71`, `foo-bar`, `foo-123-456`) with + a dedicated `InvalidIdentifier` diagnostic that points directly at the + offending name and shows the full bad text. Previously the same inputs + silently mis-parsed — the prefix was taken as a name plus unique-suffix + and the remainder was picked up as an adjacent term — which surfaced as + a confusing "unexpected '(' expecting ')'" message far from the real + site (see #7742). To use such a string as a name verbatim, wrap it in + backticks: `` `pubKeyHash-305478r71` ``. diff --git a/plutus-core/plutus-core/src/PlutusCore/Error.hs b/plutus-core/plutus-core/src/PlutusCore/Error.hs index 6c4692c1599..e9ee8e74e35 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Error.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Error.hs @@ -52,6 +52,12 @@ data ParserError = BuiltinTypeNotAStar !T.Text !SourcePos | UnknownBuiltinFunction !T.Text !SourcePos ![T.Text] | InvalidBuiltinConstant !T.Text !T.Text !SourcePos + | {-| An unquoted identifier that violates the grammar: a '-' appeared + anywhere other than as the separator of a terminal numeric unique-suffix + (e.g. @pubKeyHash-305478r71@, @foo-bar@, @foo-123-456@). The 'Text' + carries the full offending text as it appeared in the source, so the + user sees their own name back in the diagnostic. -} + InvalidIdentifier !T.Text !SourcePos deriving stock (Eq, Ord, Generic) deriving anyclass (NFData) @@ -192,6 +198,18 @@ instance Pretty ParserError where <+> squotes (pretty s) <+> "at" <+> pretty loc + pretty (InvalidIdentifier txt loc) = + "Invalid identifier" + <+> squotes (pretty txt) + <+> "at" + <+> pretty loc + <> "." + <> hardline + <> "A '-' inside a name is the numeric unique-suffix separator and must be" + <+> "followed only by digits and a word boundary." + <> hardline + <> "To use this text as a name verbatim, quote it with backticks:" + <+> pretty ("`" <> txt <> "`") instance ShowErrorComponent ParserError where showErrorComponent = show . pretty diff --git a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs index 18d3be7e2bc..eb3154059e7 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs @@ -13,6 +13,7 @@ import Control.Monad.Except import Control.Monad.Reader (ReaderT, ask, local, runReaderT) import Control.Monad.State (StateT, evalStateT) import Data.Map qualified as M +import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as Text import Text.Megaparsec hiding (ParseError, State, parse, some) @@ -217,9 +218,33 @@ name = try $ parseUnquoted <|> parseQuoted where parseUnquoted :: Parser Name parseUnquoted = do + startOffset <- getOffset + startPos <- getSourcePos' _ <- lookAhead (satisfy isIdentifierStartingChar) + inputBefore <- getInput str <- takeWhileP (Just "identifier-unquoted") isIdentifierChar - Name str <$> uniqueSuffix str + u <- uniqueSuffix str + {- The parsed prefix is only a valid identifier if the next character is + a real word-boundary. If instead we see more identifier chars or another + '-', the user wrote something like `foo-bar` or `pubKeyHash-305478r71` — + the '-NNN' run we just treated as a unique-suffix was actually part of + their intended name (or they have a stray '-' at all). Fail with a + custom diagnostic that points at the whole offending identifier. -} + mBad <- optional (lookAhead (satisfy isNameExtensionChar)) + case mBad of + Nothing -> pure (Name str u) + Just _ -> do + -- Consume the remainder so the reported text covers the full name. + _ <- takeWhileP Nothing isNameExtensionChar + inputAfter <- getInput + let consumed = Text.length inputBefore - Text.length inputAfter + fullText = Text.take consumed inputBefore + parseError $ + FancyError startOffset $ + Set.singleton (ErrorCustom (InvalidIdentifier fullText startPos)) + + isNameExtensionChar :: Char -> Bool + isNameExtensionChar c = isIdentifierChar c || c == '-' parseQuoted :: Parser Name parseQuoted = do diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden index 2c1561edb6b..f851fa8de87 100644 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden @@ -1,6 +1,7 @@ -test:1:28: +test:1:21: | 1 | (program 1.1.0 (lam foo-123-456 foo-123-456)) - | ^ -unexpected '-' -expecting '`', digit, opening bracket '[', or opening parenthesis '(' + | ^ +Invalid identifier 'foo-123-456' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `foo-123-456` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden index d0113c25b87..effdf9e6cb8 100644 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden @@ -1,6 +1,7 @@ -test:1:42: +test:1:21: | 1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) - | ^ -unexpected '(' -expecting closing parenthesis ')' + | ^ +Invalid identifier 'pubKeyHash-305478r71' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `pubKeyHash-305478r71` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden index f7d50bb245d..0ed707dbbf1 100644 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden @@ -1,6 +1,7 @@ -test:1:24: +test:1:21: | 1 | (program 1.1.0 (lam foo-bar foo-bar)) - | ^ -unexpected '-' -expecting '`', identifier-unquoted, opening bracket '[', or opening parenthesis '(' + | ^ +Invalid identifier 'foo-bar' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `foo-bar` From 1853b6b45c1606404930d7dbb98739e3373c1a94 Mon Sep 17 00:00:00 2001 From: Yuriy Lazaryev Date: Tue, 28 Apr 2026 16:24:35 +0200 Subject: [PATCH 3/4] Refactor unique-suffix parsing: MalformedUniqueSuffix + property tests (#7742) Parser commits on '-' to a unique-suffix interpretation: the region up to the next word boundary must validate as a non-empty digit string. Error variant renamed InvalidIdentifier -> MalformedUniqueSuffix, now carrying base name, malformed suffix text, and the position right after '-' so the diagnostic caret aligns with the suffix shown in the message. Three near-duplicate goldens are replaced by a Hedgehog positive/negative property pair (constructive bad-suffix generator, no rejection sampling) plus one regression golden frozen to the Scalus pubKeyHash-305478r71 shape. --- ...aryev_issue_7742_uplc_parser_large_case.md | 15 +- plutus-core/plutus-core.cabal | 1 + .../plutus-core/src/PlutusCore/Error.hs | 27 ++- .../src/PlutusCore/Parser/ParserCommon.hs | 50 +++--- .../invalid-identifier-double-unique.golden | 7 - .../invalid-identifier-hyphen-letters.golden | 7 - .../invalid-identifier-hyphen-word.golden | 7 - .../malformed-unique-suffix-scalus.golden | 7 + .../testlib/Generators/Spec.hs | 164 +++++++++++++----- 9 files changed, 166 insertions(+), 119 deletions(-) delete mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden delete mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden delete mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden diff --git a/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md index c206686cc2c..003826f4680 100644 --- a/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md +++ b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md @@ -1,12 +1,7 @@ ### Changed -- The UPLC/PLC/PIR textual parser now rejects unquoted identifiers that - contain a `-` anywhere other than as the terminal numeric unique-suffix - separator (e.g. `pubKeyHash-305478r71`, `foo-bar`, `foo-123-456`) with - a dedicated `InvalidIdentifier` diagnostic that points directly at the - offending name and shows the full bad text. Previously the same inputs - silently mis-parsed — the prefix was taken as a name plus unique-suffix - and the remainder was picked up as an adjacent term — which surfaced as - a confusing "unexpected '(' expecting ')'" message far from the real - site (see #7742). To use such a string as a name verbatim, wrap it in - backticks: `` `pubKeyHash-305478r71` ``. +- The UPLC/PLC/PIR textual parser now rejects an unquoted name whose unique + suffix (the region after `-`) is not a non-empty sequence of digits, + raising a dedicated `MalformedUniqueSuffix` diagnostic pointing at the + `-` and showing the offending suffix text (#7742). Wrap such a string + in backticks to use it as a name verbatim: `` `pubKeyHash-305478r71` ``. diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal index 606a69f515b..083486eb859 100644 --- a/plutus-core/plutus-core.cabal +++ b/plutus-core/plutus-core.cabal @@ -489,6 +489,7 @@ library untyped-plutus-core-testlib , base16-bytestring , bytestring , cardano-crypto-class + , containers , data-default-class , dlist , extra diff --git a/plutus-core/plutus-core/src/PlutusCore/Error.hs b/plutus-core/plutus-core/src/PlutusCore/Error.hs index e9ee8e74e35..df7da8f0a8b 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Error.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Error.hs @@ -2,14 +2,12 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} +-- appears in the generated instances: {-# OPTIONS_GHC -fno-warn-orphans #-} --- appears in the generated instances - module PlutusCore.Error ( ParserError (..) , ParserErrorBundle (..) @@ -52,12 +50,7 @@ data ParserError = BuiltinTypeNotAStar !T.Text !SourcePos | UnknownBuiltinFunction !T.Text !SourcePos ![T.Text] | InvalidBuiltinConstant !T.Text !T.Text !SourcePos - | {-| An unquoted identifier that violates the grammar: a '-' appeared - anywhere other than as the separator of a terminal numeric unique-suffix - (e.g. @pubKeyHash-305478r71@, @foo-bar@, @foo-123-456@). The 'Text' - carries the full offending text as it appeared in the source, so the - user sees their own name back in the diagnostic. -} - InvalidIdentifier !T.Text !SourcePos + | MalformedUniqueSuffix !T.Text !T.Text !SourcePos deriving stock (Eq, Ord, Generic) deriving anyclass (NFData) @@ -198,18 +191,22 @@ instance Pretty ParserError where <+> squotes (pretty s) <+> "at" <+> pretty loc - pretty (InvalidIdentifier txt loc) = - "Invalid identifier" - <+> squotes (pretty txt) + pretty (MalformedUniqueSuffix base suffix loc) = + "Malformed unique suffix" + <+> squotes (pretty suffix) + <+> "for name" + <+> squotes (pretty base) <+> "at" <+> pretty loc <> "." <> hardline - <> "A '-' inside a name is the numeric unique-suffix separator and must be" - <+> "followed only by digits and a word boundary." + <> "A unique suffix must be a non-empty sequence of digits" + <+> "(e.g." + <+> squotes "-123" + <> ")." <> hardline <> "To use this text as a name verbatim, quote it with backticks:" - <+> pretty ("`" <> txt <> "`") + <+> pretty ("`" <> base <> "-" <> suffix <> "`") instance ShowErrorComponent ParserError where showErrorComponent = show . pretty diff --git a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs index eb3154059e7..0ab8d69e928 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs @@ -16,6 +16,7 @@ import Data.Map qualified as M import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as Text +import Data.Text.Read qualified as TextRead import Text.Megaparsec hiding (ParseError, State, parse, some) import Text.Megaparsec.Char (char, space1) import Text.Megaparsec.Char.Lexer qualified as Lex hiding (hexadecimal) @@ -218,30 +219,9 @@ name = try $ parseUnquoted <|> parseQuoted where parseUnquoted :: Parser Name parseUnquoted = do - startOffset <- getOffset - startPos <- getSourcePos' _ <- lookAhead (satisfy isIdentifierStartingChar) - inputBefore <- getInput - str <- takeWhileP (Just "identifier-unquoted") isIdentifierChar - u <- uniqueSuffix str - {- The parsed prefix is only a valid identifier if the next character is - a real word-boundary. If instead we see more identifier chars or another - '-', the user wrote something like `foo-bar` or `pubKeyHash-305478r71` — - the '-NNN' run we just treated as a unique-suffix was actually part of - their intended name (or they have a stray '-' at all). Fail with a - custom diagnostic that points at the whole offending identifier. -} - mBad <- optional (lookAhead (satisfy isNameExtensionChar)) - case mBad of - Nothing -> pure (Name str u) - Just _ -> do - -- Consume the remainder so the reported text covers the full name. - _ <- takeWhileP Nothing isNameExtensionChar - inputAfter <- getInput - let consumed = Text.length inputBefore - Text.length inputAfter - fullText = Text.take consumed inputBefore - parseError $ - FancyError startOffset $ - Set.singleton (ErrorCustom (InvalidIdentifier fullText startPos)) + base <- takeWhileP (Just "identifier-unquoted") isIdentifierChar + Name base <$> uniqueSuffix base isNameExtensionChar :: Char -> Bool isNameExtensionChar c = isIdentifierChar c || c == '-' @@ -254,11 +234,27 @@ name = try $ parseUnquoted <|> parseQuoted _ <- char '`' Name str <$> uniqueSuffix str - -- Tries to parse a `Unique` value. - -- If it fails then looks up the `Unique` value for the given name. - -- If lookup fails too then generates a fresh `Unique` value. + {- Parses an optional unique-suffix, committing on '-': if a '-' is seen, + the entire region up to the next word boundary must validate as a + non-empty digit-string, otherwise we raise 'MalformedUniqueSuffix'. If no + '-' is seen, the name has no explicit unique and we look one up (or + generate a fresh one). -} uniqueSuffix :: Text -> Parser Unique - uniqueSuffix nameStr = try (Unique <$> (char '-' *> Lex.decimal)) <|> uniqueForName nameStr + uniqueSuffix nameStr = do + mDash <- optional (char '-') + case mDash of + Nothing -> uniqueForName nameStr + Just _ -> do + suffixOff <- getOffset + suffixPos <- getSourcePos' + suffixText <- takeWhileP (Just "unique-suffix") isNameExtensionChar + case TextRead.decimal suffixText of + Right (n, rest) | Text.null rest -> pure (Unique n) + _ -> + parseError $ + FancyError suffixOff $ + Set.singleton + (ErrorCustom (MalformedUniqueSuffix nameStr suffixText suffixPos)) -- Return the unique identifier of a name. -- If it's not in the current parser state, map the name to a fresh id and add it to the state. diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden deleted file mode 100644 index f851fa8de87..00000000000 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden +++ /dev/null @@ -1,7 +0,0 @@ -test:1:21: - | -1 | (program 1.1.0 (lam foo-123-456 foo-123-456)) - | ^ -Invalid identifier 'foo-123-456' at test:1:21. -A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. -To use this text as a name verbatim, quote it with backticks: `foo-123-456` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden deleted file mode 100644 index effdf9e6cb8..00000000000 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden +++ /dev/null @@ -1,7 +0,0 @@ -test:1:21: - | -1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) - | ^ -Invalid identifier 'pubKeyHash-305478r71' at test:1:21. -A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. -To use this text as a name verbatim, quote it with backticks: `pubKeyHash-305478r71` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden deleted file mode 100644 index 0ed707dbbf1..00000000000 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden +++ /dev/null @@ -1,7 +0,0 @@ -test:1:21: - | -1 | (program 1.1.0 (lam foo-bar foo-bar)) - | ^ -Invalid identifier 'foo-bar' at test:1:21. -A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. -To use this text as a name verbatim, quote it with backticks: `foo-bar` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden new file mode 100644 index 00000000000..3fe1b0b9a2a --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden @@ -0,0 +1,7 @@ +test:1:32: + | +1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) + | ^ +Malformed unique suffix '305478r71' for name 'pubKeyHash' at test:1:32. +A unique suffix must be a non-empty sequence of digits (e.g. '-123'). +To use this text as a name verbatim, quote it with backticks: `pubKeyHash-305478r71` diff --git a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs index c2671111706..2357846f9a7 100644 --- a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs +++ b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs @@ -8,15 +8,17 @@ module Generators.Spec where import PlutusPrelude (display, fold, void, (&&&)) import Control.Lens (view) +import Data.Foldable qualified as F +import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as T -import Hedgehog (annotate, annotateShow, failure, property, tripping, (===)) +import Hedgehog (Gen, annotate, annotateShow, failure, forAll, property, tripping, (===)) import Hedgehog.Gen qualified as Gen import Hedgehog.Range qualified as Range -import PlutusCore (Name) +import PlutusCore (Name (..), Unique (..)) import PlutusCore.Annotation (SrcSpan (..)) import PlutusCore.Default (DefaultFun, DefaultUni) -import PlutusCore.Error (ParserErrorBundle (ParseErrorB)) +import PlutusCore.Error (ParserError (..), ParserErrorBundle (ParseErrorB)) import PlutusCore.Flat (flat, unflat) import PlutusCore.Generators.Hedgehog (forAllPretty) import PlutusCore.Generators.Hedgehog.AST (runAstGen) @@ -28,7 +30,7 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.Golden (goldenVsString) import Test.Tasty.HUnit (testCase, (@?=)) import Test.Tasty.Hedgehog (testPropertyNamed) -import Text.Megaparsec (errorBundlePretty) +import Text.Megaparsec (ErrorFancy (..), ParseError (..), bundleErrors, errorBundlePretty) import Data.ByteString.Lazy qualified as BSL import Data.Text.Encoding (encodeUtf8) @@ -60,9 +62,9 @@ test_parsing = , propMissingConOperands , propInvalidKeyword , propBracketMismatch - , propInvalidIdentifierHyphenLetters - , propInvalidIdentifierHyphenWord - , propInvalidIdentifierDoubleUnique + , propValidUniqueSuffix + , propInvalidUniqueSuffix + , propInvalidUniqueSuffixScalusRegression ] ] @@ -244,48 +246,118 @@ propBracketMismatch = "bracket-mismatch" "(program 1.1.0 [(var x))" -{- Note [Negative identifier-grammar tests] -The parser's name grammar treats '-NNN' purely as the numeric unique-suffix: -'foo-123' → Name "foo" (Unique 123). A '-' anywhere else in an identifier is -not allowed by the unquoted grammar (see 'isIdentifierChar' in -'PlutusCore.Name.Unique'). Several tools in the wild (e.g. Scalus 0.16.0's -'toUplcOptimized') emit names like 'pubKeyHash-305478r71' that violate this, -and today the parser mis-parses them in a way that surfaces as a confusing -error hundreds of lines away from the offending name — see issue #7742. - -The goldens below freeze the *current* (unhelpful) error output so that a -future diagnostic improvement shows up as an explicit golden-file diff. -When the parser is taught to point at the bad name itself, accept the new -goldens with 'scripts/regen-goldens.sh' (or '--accept'). -} - -{-| @pubKeyHash-305478r71@ — the exact shape Scalus 0.16.0 produces, inside a -binder. Current behaviour: the parser eats @pubKeyHash-305478@ as name+unique, -picks up @r71@ as the lam body, then fails far away on the next paren. -} -propInvalidIdentifierHyphenLetters :: TestTree -propInvalidIdentifierHyphenLetters = +propInvalidUniqueSuffixScalusRegression :: TestTree +propInvalidUniqueSuffixScalusRegression = testParseErrorGolden - "Invalid identifier: hyphen followed by digits then letters" - "invalid-identifier-hyphen-letters" + "MalformedUniqueSuffix: Scalus pubKeyHash-305478r71 regression (#7742)" + "malformed-unique-suffix-scalus" "(program 1.1.0 (lam pubKeyHash-305478r71 (lam x x)))" -{-| @foo-bar@ — hyphen followed by non-digits. Current behaviour: the parser -stops at '-' (it is not in 'isIdentifierChar'), takes @foo@ as the name, and -then explodes on @-bar@ which is not a valid continuation anywhere. -} -propInvalidIdentifierHyphenWord :: TestTree -propInvalidIdentifierHyphenWord = - testParseErrorGolden - "Invalid identifier: hyphen followed by non-digits" - "invalid-identifier-hyphen-word" - "(program 1.1.0 (lam foo-bar foo-bar))" - -{-| @foo-123-456@ — ambiguous double '-NNN' run. Current behaviour: the first -@-123@ wins as the unique, @-456@ is left over and fails the next check. -} -propInvalidIdentifierDoubleUnique :: TestTree -propInvalidIdentifierDoubleUnique = - testParseErrorGolden - "Invalid identifier: double unique-suffix" - "invalid-identifier-double-unique" - "(program 1.1.0 (lam foo-123-456 foo-123-456))" +{-| A '-' unquoted name parses to a 'Name' carrying the base +text and a 'Unique' equal to the digits. -} +propValidUniqueSuffix :: TestTree +propValidUniqueSuffix = + testPropertyNamed + "Valid unique suffix: - parses to Name (Unique )" + "valid-unique-suffix" + $ property + $ do + base <- forAll genBaseName + n <- forAll (Gen.integral (Range.linear 0 9999999)) + let nText = T.pack (show (n :: Int)) + input = "(lam " <> base <> "-" <> nText <> " (con bool True))" + case runQuoteT (parseTerm input) of + Right (UPLC.LamAbs _ binder _) -> do + _nameText binder === base + _nameUnique binder === Unique n + Right other -> do + annotate ("Expected LamAbs, got: " <> show other) + failure + Left bundle -> do + annotateShow bundle + failure + +{-| A '-' unquoted name (where '' is empty, contains a +non-digit, or contains another '-') raises 'MalformedUniqueSuffix' carrying +'' and '' verbatim. -} +propInvalidUniqueSuffix :: TestTree +propInvalidUniqueSuffix = + testPropertyNamed + "Invalid unique suffix: - raises MalformedUniqueSuffix " + "invalid-unique-suffix" + $ property + $ do + base <- forAll genBaseName + bad <- forAll genBadSuffix + let input = "(lam " <> base <> "-" <> bad <> " (con bool True))" + case runQuoteT (parseTerm input) of + Right ok -> do + annotate ("Expected MalformedUniqueSuffix, got success: " <> show ok) + failure + Left bundle -> + case extractMalformedUniqueSuffix bundle of + Just (b, s) -> do + b === base + s === bad + Nothing -> do + annotateShow bundle + failure + where + extractMalformedUniqueSuffix :: ParserErrorBundle -> Maybe (Text, Text) + extractMalformedUniqueSuffix (ParseErrorB bundle) = + case [ (b, s) + | err <- F.toList (bundleErrors bundle) + , (b, s) <- fanciesOf err + ] of + (x : _) -> Just x + [] -> Nothing + fanciesOf (FancyError _ es) = + [(b, s) | ErrorCustom (MalformedUniqueSuffix b s _) <- Set.toList es] + fanciesOf _ = [] + +-- Generators for unquoted-name property tests. + +genIdStartChar :: Gen Char +genIdStartChar = + Gen.choice [Gen.element ['a' .. 'z'], Gen.element ['A' .. 'Z'], pure '_'] + +genIdRestChar :: Gen Char +genIdRestChar = + Gen.choice [genIdStartChar, Gen.element ['0' .. '9'], pure '\''] + +genBaseName :: Gen Text +genBaseName = do + hd <- genIdStartChar + tl <- Gen.list (Range.linear 0 8) genIdRestChar + pure (T.pack (hd : tl)) + +{-| Generate a guaranteed-malformed suffix by either returning the empty string, +or starting from a valid digit-string base (possibly empty) and inserting one +or more invalidating characters at random positions. The invalidating set is +'isNameExtensionChar' minus digits, so any single insertion turns the result +into a non-digit-only string. -} +genBadSuffix :: Gen Text +genBadSuffix = + Gen.choice + [ pure T.empty + , do + base <- T.pack <$> Gen.list (Range.linear 0 8) (Gen.element ['0' .. '9']) + n <- Gen.integral (Range.linear 1 3 :: Range.Range Int) + applyMutations n base + ] + where + applyMutations :: Int -> Text -> Gen Text + applyMutations 0 t = pure t + applyMutations k t = insertInvalidatingChar t >>= applyMutations (k - 1) + + insertInvalidatingChar :: Text -> Gen Text + insertInvalidatingChar t = do + pos <- Gen.integral (Range.linear 0 (T.length t)) + c <- Gen.element invalidatingChars + pure (T.take pos t <> T.singleton c <> T.drop pos t) + + invalidatingChars :: String + invalidatingChars = ['a' .. 'z'] <> ['A' .. 'Z'] <> "_'-" -------------------------------------------------------------------------------- -- Helper Functions ------------------------------------------------------------ From f70f713308af66c00ff9a417663a03d1c08ca7e8 Mon Sep 17 00:00:00 2001 From: Yuriy Lazaryev Date: Wed, 29 Apr 2026 09:15:38 +0200 Subject: [PATCH 4/4] ci: retrigger after Hydra x86_64 timeout (#7742)