Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 185 additions & 33 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2531,69 +2531,214 @@ bool Compiler::optAssertionVNIsSubtype(ValueNum objVN, ValueNum castToVN, ASSERT
}

//------------------------------------------------------------------------------
// optVNBasedFoldExpr_Call_Memcmp: Folds NI_System_SpanHelpers_SequenceEqual for immutable data.
// optVNBasedFoldExpr_Call_Memcmp: Folds NI_System_SpanHelpers_SequenceEqual via VN.
//
// * When both buffers are immutable data and length is constant, fold to a constant.
// * When length is constant and exactly one buffer is immutable data of suitable
// size, unroll the comparison into a chain of "(load ^ cns) | ..." == 0
//
// Arguments:
// call - NI_System_SpanHelpers_SequenceEqual call to fold
// call - the special-intrinsic call to fold
//
// Return Value:
// Returns a new tree or nullptr if nothing is changed.
//
GenTree* Compiler::optVNBasedFoldExpr_Call_Memcmp(GenTreeCall* call)
{
JITDUMP("See if we can optimize NI_System_SpanHelpers_SequenceEqual with help of VN...\n");
assert(call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_SequenceEqual));

JITDUMP("See if we can optimize SpanHelpers.SequenceEqual via VN...\n");

CallArg* arg1 = call->gtArgs.GetUserArgByIndex(0);
CallArg* arg2 = call->gtArgs.GetUserArgByIndex(1);
CallArg* lenArg = call->gtArgs.GetUserArgByIndex(2);

// Length is in bytes.
ValueNum lenVN = optConservativeNormalVN(lenArg->GetNode());
size_t len;
if (!vnStore->IsVNIntegralConstant(lenVN, &len))
{
// See if arguments are the same - in that case we can optimize to constant true
ValueNum arg1VN = optConservativeNormalVN(arg1->GetNode());
ValueNum arg2VN = optConservativeNormalVN(arg2->GetNode());
if ((arg1VN != ValueNumStore::NoVN) && (arg1VN == arg2VN))
// Assertion propagation can substitute a constant into the length expression without
// re-folding its parents (e.g. leaving "(long)(5 << 1)"), so its value number is no
// longer recognized as a constant. Fold a throwaway clone to recover the value.
GenTree* lenClone = gtCloneExpr(lenArg->GetNode());
GenTree* foldedLen = (lenClone != nullptr) ? gtFoldExprConstChain(lenClone) : nullptr;
if ((foldedLen == nullptr) || !foldedLen->IsIntegralConst())
{
JITDUMP("...both arguments have the same VN -> optimize to constant true.\n");
return gtWrapWithSideEffects(gtNewIconNode(1), call, GTF_ALL_EFFECT, true);
// Identical VN on both sides => true regardless of length.
ValueNum a1VN = optConservativeNormalVN(arg1->GetNode());
ValueNum a2VN = optConservativeNormalVN(arg2->GetNode());
if ((a1VN != ValueNumStore::NoVN) && (a1VN == a2VN))
{
JITDUMP("...both arguments have the same VN -> fold to true.\n");
return gtWrapWithSideEffects(gtNewIconNode(1), call, GTF_ALL_EFFECT, true);
}

JITDUMP("...length is not a constant - bail out.\n");
return nullptr;
}

JITDUMP("...length is not a constant - bail out.\n");
return nullptr;
len = (size_t)foldedLen->AsIntConCommon()->IntegralValue();
JITDUMP("...recovered an almost-constant length of %u.\n", (unsigned)len);
}

// SequenceEqual(..., len == 0) => true, and does not dereference pointers
// Length 0 => true; neither buffer is dereferenced.
if (len == 0)
{
JITDUMP("...length is 0 -> optimize to constant true.\n");
JITDUMP("...length is 0 -> fold to true.\n");
return gtWrapWithSideEffects(gtNewIconNode(1), call, GTF_ALL_EFFECT, true);
}

constexpr size_t maxLen = 65536; // Arbitrary threshold to avoid large buffer allocations
if (len > maxLen)
// Try to fold to a constant when both buffers are known immutable data.
constexpr size_t maxFoldLen = 65536; // arbitrary cap to avoid large allocations
if (len <= maxFoldLen)
{
CompAllocator alloc = getAllocator(CMK_AssertionProp);
uint8_t* buf1 = nullptr;
uint8_t* buf2 = nullptr;
if (GetImmutableDataFromAddress(arg1->GetNode(), (int)len, alloc, &buf1) &&
GetImmutableDataFromAddress(arg2->GetNode(), (int)len, alloc, &buf2))
{
const bool equal = (memcmp(buf1, buf2, len) == 0);
JITDUMP("...both buffers known at compile time -> fold to %s.\n", equal ? "true" : "false");
return gtWrapWithSideEffects(gtNewIconNode(equal ? 1 : 0), call, GTF_ALL_EFFECT, true);
}
}

// Unroll path: requires exactly one buffer to be immutable data within the unroll threshold.
if (len > getUnrollThreshold(MemcmpU16))
{
JITDUMP("...length is too big (%u bytes) - bail out.\n", (unsigned)len);
JITDUMP("...length is too big to unroll (%u bytes) - bail out.\n", (unsigned)len);
return nullptr;
}

uint8_t* buffer1 = nullptr;
uint8_t* buffer2 = nullptr;
if (GetImmutableDataFromAddress(arg1->GetNode(), (int)len, getAllocator(CMK_AssertionProp), &buffer1) &&
GetImmutableDataFromAddress(arg2->GetNode(), (int)len, getAllocator(CMK_AssertionProp), &buffer2))
if (compCurBB->isRunRarely())
{
assert(buffer1 != nullptr && buffer2 != nullptr);
// If both memory regions are known at compile time, we can fold to a constant.
bool areEqual = (memcmp(buffer1, buffer2, len) == 0);
JITDUMP("...both memory regions are known at compile time -> optimize to constant %s.\n",
areEqual ? "true" : "false");
return gtWrapWithSideEffects(gtNewIconNode(areEqual ? 1 : 0), call, GTF_ALL_EFFECT, true);
JITDUMP("...block is cold - not profitable to expand.\n");
return nullptr;
}

JITDUMP("...data is not known at compile time - bail out.\n");
return nullptr;
CompAllocator alloc = getAllocator(CMK_AssertionProp);
uint8_t* cnsData = nullptr;
CallArg* dataArg = nullptr;
if (GetImmutableDataFromAddress(arg1->GetNode(), (int)len, alloc, &cnsData))
{
dataArg = arg2;
}
else if (GetImmutableDataFromAddress(arg2->GetNode(), (int)len, alloc, &cnsData))
{
dataArg = arg1;
}
else
{
JITDUMP("...neither buffer is constant - bail out.\n");
return nullptr;
}

// Peel any leading constant offset off the non-constant side (e.g. the "+12" that addresses a
// string's first char) so it can be folded into each chunk's displacement below instead of
// being computed once and buried inside the multi-use temp. When the remaining base is a local
// this also lets fgMakeMultiUse clone it directly, avoiding a temp altogether.
target_ssize_t baseOffset = 0;
gtPeelOffsets(&dataArg->NodeRef(), &baseOffset);

// We're going to read the unknown side multiple times - spill to a temp if needed.
GenTree* data = fgMakeMultiUse(&dataArg->NodeRef());

// Extract side effects from the original call (e.g., evaluation of address args).
GenTree* sideEffects = nullptr;
gtExtractSideEffList(call, &sideEffects, GTF_ALL_EFFECT, true);

// SIMD-aware bitwise/compare helper.
auto binOp = [this](genTreeOps op, var_types type, GenTree* op1, GenTree* op2) -> GenTree* {
#ifdef FEATURE_HW_INTRINSICS
if (varTypeIsSIMD(type))
{
return gtNewSimdBinOpNode(op, type, op1, op2, TYP_U_IMPL, genTypeSize(type));
}
if (varTypeIsSIMD(op1))
{
assert(varTypeIsSIMD(op2));
return gtNewSimdCmpOpAllNode(op, type, op1, op2, TYP_U_IMPL, genTypeSize(op1));
}
#endif
return gtNewOperNode(op, type, op1, op2);
};

// Pick the widest type we can use and walk the buffer chunk-by-chunk. When the trailing
// chunk is smaller than the current type, switch to a narrower scalar (or, for SIMD,
// overlap with previously processed data).
var_types readType = roundDownMaxType((unsigned)len, /* conservative */ true);
unsigned remaining = (unsigned)len;
GenTree* result = nullptr;

while (remaining > 0)
{
if (remaining < genTypeSize(readType))
{
if (varTypeIsIntegral(readType))
{
readType = roundUpGPRType(remaining);
}
// For SIMD, keep the same type and let the load overlap with previous data.
remaining = genTypeSize(readType);
}

const unsigned chunkSize = genTypeSize(readType);
const ssize_t offset = (ssize_t)len - (ssize_t)remaining;

// Loaded chunk: IND<readType>(data + baseOffset + offset). The peeled-off baseOffset is
// folded into the displacement here rather than materialized as a separate add.
GenTree* loadOffset = gtNewIconNode((ssize_t)baseOffset + offset, TYP_I_IMPL);
GenTree* loadAddr = gtNewOperNode(GT_ADD, TYP_BYREF, gtCloneExpr(data), loadOffset);
GenTree* loaded = gtNewIndir(readType, loadAddr, GTF_IND_UNALIGNED | GTF_IND_ALLOW_NON_ATOMIC);

constexpr unsigned maxChunkBytes = 64; // largest possible chunk = TYP_SIMD64
assert(chunkSize <= maxChunkBytes);
uint8_t cnsChunk[maxChunkBytes] = {};
memcpy(cnsChunk, cnsData + offset, chunkSize);

GenTree* cnsTree = gtNewGenericCon(readType, cnsChunk);

// A single-chunk integral compare can skip the XOR/OR scaffolding.
if ((chunkSize == len) && varTypeIsIntegral(readType))
{
assert(result == nullptr);
result = binOp(GT_EQ, TYP_INT, loaded, cnsTree);
break;
}

GenTree* xorNode = binOp(GT_XOR, genActualType(readType), loaded, cnsTree);

if (result == nullptr)
{
result = xorNode;
}
else
{
// Merge into the running OR. When the new chunk is a narrower integral type
// (sliding to a smaller scalar at the tail), zero-extend it to the running type.
if (!result->TypeIs(readType))
{
assert(varTypeIsIntegral(result) && varTypeIsIntegral(readType));
xorNode = gtNewCastNode(result->TypeGet(), xorNode, true, result->TypeGet());
}
result = binOp(GT_OR, genActualType(result->TypeGet()), result, xorNode);
}

remaining -= chunkSize;
}

// For the multi-chunk case we still need a final compare-with-zero.
if (!result->OperIs(GT_EQ))
{
result = binOp(GT_EQ, TYP_INT, result, gtNewZeroConNode(result->TypeGet()));
}

GenTree* fold = (sideEffects == nullptr) ? result : gtNewOperNode(GT_COMMA, TYP_INT, sideEffects, result);
JITDUMP("...unrolled to:\n");
DISPTREE(fold);
return fold;
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -2774,14 +2919,12 @@ GenTree* Compiler::optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call)
// optVNBasedFoldExpr_Call: Folds given call using VN to a simpler tree.
//
// Arguments:
// block - The block containing the tree.
// parent - The parent node of the tree.
// call - The call to fold
// call - The call to fold
//
// Return Value:
// Returns a new tree or nullptr if nothing is changed.
//
GenTree* Compiler::optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call)
GenTree* Compiler::optVNBasedFoldExpr_Call(GenTreeCall* call)
{
switch (call->GetHelperNum())
{
Expand Down Expand Up @@ -2878,7 +3021,7 @@ GenTree* Compiler::optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTre
switch (tree->OperGet())
{
case GT_CALL:
return optVNBasedFoldExpr_Call(block, parent, tree->AsCall());
return optVNBasedFoldExpr_Call(tree->AsCall());

// We can add more VN-based foldings here.

Expand Down Expand Up @@ -5444,6 +5587,15 @@ bool Compiler::optWriteBarrierAssertionProp_StoreBlk(ASSERT_VALARG_TP assertions

GenTree* Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, Statement* stmt)
{
if (!optLocalAssertionProp)
{
GenTree* folded = optVNBasedFoldExpr_Call(call);
if (folded != nullptr)
{
return optAssertionProp_Update(folded, call, stmt);
}
}

if (optNonNullAssertionProp_Call(assertions, call))
{
return optAssertionProp_Update(call, call, stmt);
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3960,6 +3960,7 @@ class Compiler
//-------------------------------------------------------------------------

GenTree* gtFoldExpr(GenTree* tree);
GenTree* gtFoldExprConstChain(GenTree* tree, int maxDepth = 10);
GenTree* gtFoldExprConst(GenTree* tree);

GenTree* gtFoldExprUnary(GenTreeUnOp* tree);
Expand Down Expand Up @@ -9055,7 +9056,7 @@ class Compiler
GenTree* optVNConstantPropOnJTrue(BasicBlock* block, GenTree* test);
GenTree* optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
GenTree* optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
GenTree* optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call(GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call_Memset(GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call_Memcmp(GenTreeCall* call);
Expand Down
40 changes: 40 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14999,6 +14999,46 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree)
}
}

//------------------------------------------------------------------------
// gtFoldExprConstChain: fold a tree into a constant when possible, recursing
// into operands first.
//
// Arguments:
// tree - the tree to fold (mutated in place)
// maxDepth - remaining recursion budget; folding stops (and the subtree is
// returned unfolded) once it is exhausted
//
// Returns:
// The folded tree; a constant node if the whole chain folded to a constant.
//
GenTree* Compiler::gtFoldExprConstChain(GenTree* tree, int maxDepth)
{
if (maxDepth <= 0)
{
return tree;
}

if (tree->OperIsUnary())
{
if (tree->AsOp()->gtOp1 != nullptr)
{
tree->AsOp()->gtOp1 = gtFoldExprConstChain(tree->AsOp()->gtOp1, maxDepth - 1);
}
}
else if (tree->OperIsBinary())
{
if (tree->AsOp()->gtOp1 != nullptr)
{
tree->AsOp()->gtOp1 = gtFoldExprConstChain(tree->AsOp()->gtOp1, maxDepth - 1);
}
if (tree->AsOp()->gtOp2 != nullptr)
{
tree->AsOp()->gtOp2 = gtFoldExprConstChain(tree->AsOp()->gtOp2, maxDepth - 1);
}
}
return gtFoldExpr(tree);
}

//------------------------------------------------------------------------
// gtFoldExprUnary: see if a unary operation is foldable
//
Expand Down
43 changes: 43 additions & 0 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12870,6 +12870,49 @@ bool Compiler::GetImmutableDataFromAddress(GenTree* address, int size, CompAlloc
return info.compCompHnd->getStaticFieldContent(fld, *ppValue, size, (int)byteOffset);
}

// See if 'address' reads through a static readonly reference-typed field, e.g. the characters of a
// "static readonly string": [cns +] InvariantNonNullLoad(fieldSeq). The referenced object may be
// movable, so we read its handle with ignoreMovableObjects=false and then read the (immutable)
// content it points to. Only that content is baked in - the movable handle is never embedded.
ValueNum addrVN = vnStore->VNLiberalNormalValue(address->gtVNPair);
const var_types addrVNType = vnStore->TypeOfVN(addrVN);
if ((addrVNType == TYP_BYREF) || (addrVNType == TYP_I_IMPL) || (addrVNType == TYP_REF))
{
target_ssize_t offset = 0;
vnStore->PeelOffsets(&addrVN, &offset);

VNFuncApp funcApp;
if ((offset >= 0) && ((size_t)offset <= INT32_MAX) && (vnStore->TypeOfVN(addrVN) == TYP_REF) &&
vnStore->GetVNFunc(addrVN, &funcApp) && funcApp.FuncIs(VNF_InvariantNonNullLoad))
{
ValueNum fieldSeqVN = vnStore->VNNormalValue(funcApp.GetArg(0));
if (vnStore->IsVNHandle(fieldSeqVN, GTF_ICON_FIELD_SEQ))
{
FieldSeq* fseq = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN);
if ((fseq != nullptr) && fseq->IsStaticField())
{
CORINFO_FIELD_HANDLE fld = fseq->GetFieldHandle();
if (fld != nullptr)
{
// Read the (possibly movable) object reference stored in the static field.
uint8_t handleBuf[TARGET_POINTER_SIZE] = {0};
if (info.compCompHnd->getStaticFieldContent(fld, handleBuf, TARGET_POINTER_SIZE, 0,
/* ignoreMovableObjects */ false))
{
CORINFO_OBJECT_HANDLE objHnd = NO_OBJECT_HANDLE;
memcpy(&objHnd, handleBuf, TARGET_POINTER_SIZE);
if ((objHnd != NO_OBJECT_HANDLE) && info.compCompHnd->isObjectImmutable(objHnd))
{
*ppValue = new (alloc) uint8_t[(size_t)size];
return info.compCompHnd->getObjectContent(objHnd, *ppValue, size, (int)offset);
}
}
}
}
}
}
}

return false;
}

Expand Down
Loading