diff --git a/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md new file mode 100644 index 00000000000..003826f4680 --- /dev/null +++ b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md @@ -0,0 +1,7 @@ +### Changed + +- The UPLC/PLC/PIR textual parser now rejects an unquoted name whose unique + suffix (the region after `-`) is not a non-empty sequence of digits, + raising a dedicated `MalformedUniqueSuffix` diagnostic pointing at the + `-` and showing the offending suffix text (#7742). Wrap such a string + in backticks to use it as a name verbatim: `` `pubKeyHash-305478r71` ``. diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal index 606a69f515b..083486eb859 100644 --- a/plutus-core/plutus-core.cabal +++ b/plutus-core/plutus-core.cabal @@ -489,6 +489,7 @@ library untyped-plutus-core-testlib , base16-bytestring , bytestring , cardano-crypto-class + , containers , data-default-class , dlist , extra diff --git a/plutus-core/plutus-core/src/PlutusCore/Error.hs b/plutus-core/plutus-core/src/PlutusCore/Error.hs index 6c4692c1599..df7da8f0a8b 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Error.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Error.hs @@ -2,14 +2,12 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} +-- appears in the generated instances: {-# OPTIONS_GHC -fno-warn-orphans #-} --- appears in the generated instances - module PlutusCore.Error ( ParserError (..) , ParserErrorBundle (..) @@ -52,6 +50,7 @@ data ParserError = BuiltinTypeNotAStar !T.Text !SourcePos | UnknownBuiltinFunction !T.Text !SourcePos ![T.Text] | InvalidBuiltinConstant !T.Text !T.Text !SourcePos + | MalformedUniqueSuffix !T.Text !T.Text !SourcePos deriving stock (Eq, Ord, Generic) deriving anyclass (NFData) @@ -192,6 +191,22 @@ instance Pretty ParserError where <+> squotes (pretty s) <+> "at" <+> pretty loc + pretty (MalformedUniqueSuffix base suffix loc) = + "Malformed unique suffix" + <+> squotes (pretty suffix) + <+> "for name" + <+> squotes (pretty base) + <+> "at" + <+> pretty loc + <> "." + <> hardline + <> "A unique suffix must be a non-empty sequence of digits" + <+> "(e.g." + <+> squotes "-123" + <> ")." + <> hardline + <> "To use this text as a name verbatim, quote it with backticks:" + <+> pretty ("`" <> base <> "-" <> suffix <> "`") instance ShowErrorComponent ParserError where showErrorComponent = show . pretty diff --git a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs index 18d3be7e2bc..0ab8d69e928 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs @@ -13,8 +13,10 @@ import Control.Monad.Except import Control.Monad.Reader (ReaderT, ask, local, runReaderT) import Control.Monad.State (StateT, evalStateT) import Data.Map qualified as M +import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as Text +import Data.Text.Read qualified as TextRead import Text.Megaparsec hiding (ParseError, State, parse, some) import Text.Megaparsec.Char (char, space1) import Text.Megaparsec.Char.Lexer qualified as Lex hiding (hexadecimal) @@ -218,8 +220,11 @@ name = try $ parseUnquoted <|> parseQuoted parseUnquoted :: Parser Name parseUnquoted = do _ <- lookAhead (satisfy isIdentifierStartingChar) - str <- takeWhileP (Just "identifier-unquoted") isIdentifierChar - Name str <$> uniqueSuffix str + base <- takeWhileP (Just "identifier-unquoted") isIdentifierChar + Name base <$> uniqueSuffix base + + isNameExtensionChar :: Char -> Bool + isNameExtensionChar c = isIdentifierChar c || c == '-' parseQuoted :: Parser Name parseQuoted = do @@ -229,11 +234,27 @@ name = try $ parseUnquoted <|> parseQuoted _ <- char '`' Name str <$> uniqueSuffix str - -- Tries to parse a `Unique` value. - -- If it fails then looks up the `Unique` value for the given name. - -- If lookup fails too then generates a fresh `Unique` value. + {- Parses an optional unique-suffix, committing on '-': if a '-' is seen, + the entire region up to the next word boundary must validate as a + non-empty digit-string, otherwise we raise 'MalformedUniqueSuffix'. If no + '-' is seen, the name has no explicit unique and we look one up (or + generate a fresh one). -} uniqueSuffix :: Text -> Parser Unique - uniqueSuffix nameStr = try (Unique <$> (char '-' *> Lex.decimal)) <|> uniqueForName nameStr + uniqueSuffix nameStr = do + mDash <- optional (char '-') + case mDash of + Nothing -> uniqueForName nameStr + Just _ -> do + suffixOff <- getOffset + suffixPos <- getSourcePos' + suffixText <- takeWhileP (Just "unique-suffix") isNameExtensionChar + case TextRead.decimal suffixText of + Right (n, rest) | Text.null rest -> pure (Unique n) + _ -> + parseError $ + FancyError suffixOff $ + Set.singleton + (ErrorCustom (MalformedUniqueSuffix nameStr suffixText suffixPos)) -- Return the unique identifier of a name. -- If it's not in the current parser state, map the name to a fresh id and add it to the state. diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden new file mode 100644 index 00000000000..3fe1b0b9a2a --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/malformed-unique-suffix-scalus.golden @@ -0,0 +1,7 @@ +test:1:32: + | +1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) + | ^ +Malformed unique suffix '305478r71' for name 'pubKeyHash' at test:1:32. +A unique suffix must be a non-empty sequence of digits (e.g. '-123'). +To use this text as a name verbatim, quote it with backticks: `pubKeyHash-305478r71` diff --git a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs index b6036a90b52..2357846f9a7 100644 --- a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs +++ b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs @@ -8,15 +8,17 @@ module Generators.Spec where import PlutusPrelude (display, fold, void, (&&&)) import Control.Lens (view) +import Data.Foldable qualified as F +import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as T -import Hedgehog (annotate, annotateShow, failure, property, tripping, (===)) +import Hedgehog (Gen, annotate, annotateShow, failure, forAll, property, tripping, (===)) import Hedgehog.Gen qualified as Gen import Hedgehog.Range qualified as Range -import PlutusCore (Name) +import PlutusCore (Name (..), Unique (..)) import PlutusCore.Annotation (SrcSpan (..)) import PlutusCore.Default (DefaultFun, DefaultUni) -import PlutusCore.Error (ParserErrorBundle (ParseErrorB)) +import PlutusCore.Error (ParserError (..), ParserErrorBundle (ParseErrorB)) import PlutusCore.Flat (flat, unflat) import PlutusCore.Generators.Hedgehog (forAllPretty) import PlutusCore.Generators.Hedgehog.AST (runAstGen) @@ -28,7 +30,7 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.Golden (goldenVsString) import Test.Tasty.HUnit (testCase, (@?=)) import Test.Tasty.Hedgehog (testPropertyNamed) -import Text.Megaparsec (errorBundlePretty) +import Text.Megaparsec (ErrorFancy (..), ParseError (..), bundleErrors, errorBundlePretty) import Data.ByteString.Lazy qualified as BSL import Data.Text.Encoding (encodeUtf8) @@ -60,6 +62,9 @@ test_parsing = , propMissingConOperands , propInvalidKeyword , propBracketMismatch + , propValidUniqueSuffix + , propInvalidUniqueSuffix + , propInvalidUniqueSuffixScalusRegression ] ] @@ -241,6 +246,119 @@ propBracketMismatch = "bracket-mismatch" "(program 1.1.0 [(var x))" +propInvalidUniqueSuffixScalusRegression :: TestTree +propInvalidUniqueSuffixScalusRegression = + testParseErrorGolden + "MalformedUniqueSuffix: Scalus pubKeyHash-305478r71 regression (#7742)" + "malformed-unique-suffix-scalus" + "(program 1.1.0 (lam pubKeyHash-305478r71 (lam x x)))" + +{-| A '-' unquoted name parses to a 'Name' carrying the base +text and a 'Unique' equal to the digits. -} +propValidUniqueSuffix :: TestTree +propValidUniqueSuffix = + testPropertyNamed + "Valid unique suffix: - parses to Name (Unique )" + "valid-unique-suffix" + $ property + $ do + base <- forAll genBaseName + n <- forAll (Gen.integral (Range.linear 0 9999999)) + let nText = T.pack (show (n :: Int)) + input = "(lam " <> base <> "-" <> nText <> " (con bool True))" + case runQuoteT (parseTerm input) of + Right (UPLC.LamAbs _ binder _) -> do + _nameText binder === base + _nameUnique binder === Unique n + Right other -> do + annotate ("Expected LamAbs, got: " <> show other) + failure + Left bundle -> do + annotateShow bundle + failure + +{-| A '-' unquoted name (where '' is empty, contains a +non-digit, or contains another '-') raises 'MalformedUniqueSuffix' carrying +'' and '' verbatim. -} +propInvalidUniqueSuffix :: TestTree +propInvalidUniqueSuffix = + testPropertyNamed + "Invalid unique suffix: - raises MalformedUniqueSuffix " + "invalid-unique-suffix" + $ property + $ do + base <- forAll genBaseName + bad <- forAll genBadSuffix + let input = "(lam " <> base <> "-" <> bad <> " (con bool True))" + case runQuoteT (parseTerm input) of + Right ok -> do + annotate ("Expected MalformedUniqueSuffix, got success: " <> show ok) + failure + Left bundle -> + case extractMalformedUniqueSuffix bundle of + Just (b, s) -> do + b === base + s === bad + Nothing -> do + annotateShow bundle + failure + where + extractMalformedUniqueSuffix :: ParserErrorBundle -> Maybe (Text, Text) + extractMalformedUniqueSuffix (ParseErrorB bundle) = + case [ (b, s) + | err <- F.toList (bundleErrors bundle) + , (b, s) <- fanciesOf err + ] of + (x : _) -> Just x + [] -> Nothing + fanciesOf (FancyError _ es) = + [(b, s) | ErrorCustom (MalformedUniqueSuffix b s _) <- Set.toList es] + fanciesOf _ = [] + +-- Generators for unquoted-name property tests. + +genIdStartChar :: Gen Char +genIdStartChar = + Gen.choice [Gen.element ['a' .. 'z'], Gen.element ['A' .. 'Z'], pure '_'] + +genIdRestChar :: Gen Char +genIdRestChar = + Gen.choice [genIdStartChar, Gen.element ['0' .. '9'], pure '\''] + +genBaseName :: Gen Text +genBaseName = do + hd <- genIdStartChar + tl <- Gen.list (Range.linear 0 8) genIdRestChar + pure (T.pack (hd : tl)) + +{-| Generate a guaranteed-malformed suffix by either returning the empty string, +or starting from a valid digit-string base (possibly empty) and inserting one +or more invalidating characters at random positions. The invalidating set is +'isNameExtensionChar' minus digits, so any single insertion turns the result +into a non-digit-only string. -} +genBadSuffix :: Gen Text +genBadSuffix = + Gen.choice + [ pure T.empty + , do + base <- T.pack <$> Gen.list (Range.linear 0 8) (Gen.element ['0' .. '9']) + n <- Gen.integral (Range.linear 1 3 :: Range.Range Int) + applyMutations n base + ] + where + applyMutations :: Int -> Text -> Gen Text + applyMutations 0 t = pure t + applyMutations k t = insertInvalidatingChar t >>= applyMutations (k - 1) + + insertInvalidatingChar :: Text -> Gen Text + insertInvalidatingChar t = do + pos <- Gen.integral (Range.linear 0 (T.length t)) + c <- Gen.element invalidatingChars + pure (T.take pos t <> T.singleton c <> T.drop pos t) + + invalidatingChars :: String + invalidatingChars = ['a' .. 'z'] <> ['A' .. 'Z'] <> "_'-" + -------------------------------------------------------------------------------- -- Helper Functions ------------------------------------------------------------