Skip to content

Commit df6c9d4

Browse files
committed
Add additional STL rules (both experimental and recommended.)
1 parent 838880f commit df6c9d4

17 files changed

Lines changed: 1694 additions & 0 deletions
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
4+
/**
5+
* @name Non-constant format string
6+
* @description Passing a value that is not a string literal 'format' string to a printf-like function can lead
7+
* to a mismatch between the number of arguments defined by the 'format' and the number
8+
* of arguments actually passed to the function. If the format string ultimately stems
9+
* from an untrusted source, this can be used for exploits.
10+
* This query finds format strings coming from non-literal sources. Note that format strings of
11+
* type `const char*` it is still considered non-constant if the value is not coming from a string
12+
* literal. For example, for a parameter with type `const char*` of an exported function that is
13+
* used as a format string, there is no way to ensure the originating value was a string literal.
14+
* @kind path-problem
15+
* @problem.severity recommendation
16+
* @security-severity 9.3
17+
* @precision high
18+
* @id cpp/microsoft/public/likely-bugs/format/non-constant-format
19+
* @tags maintainability
20+
* correctness
21+
* security
22+
* external/cwe/cwe-134
23+
*/
24+
25+
import semmle.code.cpp.ir.dataflow.TaintTracking
26+
import semmle.code.cpp.commons.Printf
27+
import semmle.code.cpp.security.FlowSources
28+
import semmle.code.cpp.ir.dataflow.internal.ModelUtil
29+
import semmle.code.cpp.models.interfaces.DataFlow
30+
import semmle.code.cpp.models.interfaces.Taint
31+
import semmle.code.cpp.ir.IR
32+
import NonConstFlow::PathGraph
33+
34+
class UncalledFunction extends Function {
35+
UncalledFunction() {
36+
not exists(Call c | c.getTarget() = this) and
37+
// Ignore functions that appear to be function pointers
38+
// function pointers may be seen as uncalled statically
39+
not exists(FunctionAccess fa | fa.getTarget() = this)
40+
}
41+
}
42+
43+
/** The `unsigned short` type. */
44+
class UnsignedShort extends ShortType {
45+
UnsignedShort() { this.isUnsigned() }
46+
}
47+
48+
/**
49+
* Holds if `t` cannot refer to a string. That is, it's a built-in
50+
* or arithmetic type that is not a "`char` like" type.
51+
*/
52+
predicate cannotContainString(Type t) {
53+
exists(Type unspecified |
54+
unspecified = t.getUnspecifiedType() and
55+
not unspecified instanceof UnknownType and
56+
not unspecified instanceof CharType and
57+
not unspecified instanceof WideCharType and
58+
not unspecified instanceof Char8Type and
59+
not unspecified instanceof Char16Type and
60+
not unspecified instanceof Char32Type and
61+
// C often defines `wchar_t` as `unsigned short`
62+
not unspecified instanceof UnsignedShort
63+
|
64+
unspecified instanceof ArithmeticType or
65+
unspecified instanceof BuiltInType
66+
)
67+
}
68+
69+
predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) {
70+
func.(DataFlowFunction).hasDataFlow(_, output) or
71+
func.(TaintFunction).hasTaintFlow(_, output)
72+
}
73+
74+
/**
75+
* Holds if `node` is a non-constant source of data flow for non-const format string detection.
76+
* This is defined as either:
77+
* 1) a `FlowSource`
78+
* 2) a parameter of an 'uncalled' function
79+
* 3) an argument to a function with no definition that is not known to define the output through its input
80+
* 4) an out arg of a function with no definition that is not known to define the output through its input
81+
*
82+
* The latter two cases address identifying standard string manipulation libraries as input sources
83+
* e.g., strcpy. More simply, functions without definitions that are known to manipulate the
84+
* input to produce an output are not sources. Instead the ultimate source of input to these functions
85+
* should be considered as the source.
86+
*
87+
* False Negative Implication: This approach has false negatives (fails to identify non-const sources)
88+
* when the source is a field of a struct or object and the initialization is not observed statically.
89+
* There are 3 general cases where this can occur:
90+
* 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string.
91+
* 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically.
92+
* e.g., an object constructor isn't known statically, or a function sets fields
93+
* of a struct, but the function is not known statically.
94+
* 3) A function meeting cases (3) and (4) above returns (through an out argument or return value)
95+
* a struct or object where a field containing a format string has been initialized.
96+
*
97+
* Note, uninitialized variables used as format strings are never detected by design.
98+
* Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query.
99+
*/
100+
predicate isNonConst(DataFlow::Node node) {
101+
node instanceof FlowSource
102+
or
103+
// Parameters of uncalled functions that aren't const
104+
exists(UncalledFunction f, Parameter p |
105+
f.getAParameter() = p and
106+
// We pick the indirection of the parameter since this query is focused
107+
// on strings.
108+
p = node.asParameter(1) and
109+
// Ignore main's argv parameter as it is already considered a `FlowSource`
110+
// not ignoring it will result in path redundancies
111+
(f.getName() = "main" implies p != f.getParameter(1))
112+
)
113+
or
114+
// Consider as an input any out arg of a function or a function's return where the function is not:
115+
// 1. a function with a known dataflow or taintflow from input to output and the `node` is the output
116+
// 2. a function where there is a known definition
117+
// i.e., functions that with unknown bodies and are not known to define the output through its input
118+
// are considered as possible non-const sources
119+
// The function's output must also not be const to be considered a non-const source
120+
exists(Function func, CallInstruction call |
121+
not func.hasDefinition() and
122+
func = call.getStaticCallTarget()
123+
|
124+
// Case 1: It's a known dataflow or taintflow function with flow to the return value
125+
call.getUnconvertedResultExpression() = node.asIndirectExpr() and
126+
not exists(FunctionOutput output |
127+
dataFlowOrTaintFlowFunction(func, output) and
128+
output.isReturnValueDeref(_) and
129+
node = callOutput(call, output)
130+
)
131+
or
132+
// Case 2: It's a known dataflow or taintflow function with flow to an output parameter
133+
exists(int i |
134+
call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() =
135+
node.asDefiningArgument() and
136+
not exists(FunctionOutput output |
137+
dataFlowOrTaintFlowFunction(func, output) and
138+
output.isParameterDeref(i, _) and
139+
node = callOutput(call, output)
140+
)
141+
)
142+
)
143+
}
144+
145+
/**
146+
* Holds if `sink` is a sink is a format string of any
147+
* `FormattingFunctionCall`.
148+
*/
149+
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
150+
sink.asIndirectExpr() = formatString and
151+
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
152+
}
153+
154+
module NonConstFlowConfig implements DataFlow::ConfigSig {
155+
predicate isSource(DataFlow::Node source) {
156+
exists(Type t |
157+
isNonConst(source) and
158+
t = source.getType() and
159+
not cannotContainString(t)
160+
)
161+
}
162+
163+
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
164+
165+
predicate isBarrier(DataFlow::Node node) {
166+
// Ignore tracing non-const through array indices
167+
exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr())
168+
or
169+
exists(Type t |
170+
t = node.getType() and
171+
cannotContainString(t)
172+
)
173+
}
174+
175+
predicate observeDiffInformedIncrementalMode() { any() }
176+
177+
Location getASelectedSinkLocation(DataFlow::Node sink) {
178+
exists(FormattingFunctionCall call, Expr formatString |
179+
result = [call.getLocation(), sink.getLocation()]
180+
|
181+
isSinkImpl(sink, formatString) and
182+
call.getArgument(call.getFormatParameterIndex()) = formatString
183+
)
184+
}
185+
}
186+
187+
module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>;
188+
189+
from
190+
FormattingFunctionCall call, Expr formatString, NonConstFlow::PathNode sink,
191+
NonConstFlow::PathNode source
192+
where
193+
isSinkImpl(sink.getNode(), formatString) and
194+
call.getArgument(call.getFormatParameterIndex()) = formatString and
195+
NonConstFlow::flowPath(source, sink)
196+
select sink.getNode(), source, sink,
197+
"The format string argument to $@ has a source which cannot be " +
198+
"verified to originate from a string literal.", call, call.getTarget().getName()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
4+
import cpp
5+
6+
// an expression of the form sizeof(e) or strlen(e)
7+
class BufferSizeExpr extends Expr {
8+
BufferSizeExpr() {
9+
this instanceof SizeofExprOperator or
10+
this instanceof StrlenCall
11+
}
12+
13+
Expr getArg() {
14+
result = this.(SizeofExprOperator).getExprOperand() or
15+
result = this.(StrlenCall).getStringExpr()
16+
}
17+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
4+
/**
5+
* @name Potential improper null termination
6+
* @description Using a string that may not be null terminated as an argument
7+
* to a string function can result in buffer overflow or buffer over-read.
8+
* @kind problem
9+
* @id cpp/microsoft/public/likely-bugs/memory-management/improper-null-termination
10+
* @problem.severity warning
11+
* @security-severity 7.8
12+
* @tags security
13+
* external/cwe/cwe-170
14+
* external/cwe/cwe-665
15+
*/
16+
17+
import cpp
18+
import semmle.code.cpp.controlflow.StackVariableReachability
19+
import semmle.code.cpp.commons.NullTermination
20+
21+
/**
22+
* A declaration of a local variable that leaves the variable uninitialized.
23+
*/
24+
DeclStmt declWithNoInit(LocalVariable v) {
25+
result.getADeclaration() = v and
26+
not exists(v.getInitializer())
27+
}
28+
29+
/**
30+
* Control flow reachability from a buffer that is not not null terminated to a
31+
* sink that requires null termination.
32+
*/
33+
class ImproperNullTerminationReachability extends StackVariableReachabilityWithReassignment {
34+
ImproperNullTerminationReachability() { this = "ImproperNullTerminationReachability" }
35+
36+
override predicate isSourceActual(ControlFlowNode node, StackVariable v) {
37+
node = declWithNoInit(v)
38+
or
39+
exists(Call c, int bufferArg, int sizeArg |
40+
c = node and
41+
(
42+
c.getTarget().hasName("readlink") and bufferArg = 1 and sizeArg = 2
43+
or
44+
c.getTarget().hasName("readlinkat") and bufferArg = 2 and sizeArg = 3
45+
) and
46+
c.getArgument(bufferArg).(VariableAccess).getTarget() = v and
47+
(
48+
// buffer size parameter likely matches the full buffer size
49+
c.getArgument(sizeArg) instanceof SizeofOperator or
50+
c.getArgument(sizeArg).getValue().toInt() = v.getType().getSize()
51+
)
52+
)
53+
}
54+
55+
override predicate isSinkActual(ControlFlowNode node, StackVariable v) {
56+
node.(VariableAccess).getTarget() = v and
57+
variableMustBeNullTerminated(node)
58+
}
59+
60+
override predicate isBarrier(ControlFlowNode node, StackVariable v) {
61+
exprDefinition(v, node, _) or
62+
this.isSinkActual(node, v) // only report first use
63+
}
64+
}
65+
66+
/**
67+
* Flow from a place where null termination is added, to a sink of
68+
* `ImproperNullTerminationReachability`. This was previously implemented as a
69+
* simple barrier in `ImproperNullTerminationReachability`, but there were
70+
* false positive results involving multiple paths from source to sink. We'd
71+
* prefer to report only the results we are sure of.
72+
*/
73+
class NullTerminationReachability extends StackVariableReachabilityWithReassignment {
74+
NullTerminationReachability() { this = "NullTerminationReachability" }
75+
76+
override predicate isSourceActual(ControlFlowNode node, StackVariable v) {
77+
mayAddNullTerminator(node, v.getAnAccess()) or // null termination
78+
node.(AddressOfExpr).getOperand() = v.getAnAccess() // address taken (possible null termination)
79+
}
80+
81+
override predicate isSinkActual(ControlFlowNode node, StackVariable v) {
82+
// have the same sinks as `ImproperNullTerminationReachability`.
83+
exists(ImproperNullTerminationReachability r | r.isSinkActual(node, v))
84+
}
85+
86+
override predicate isBarrier(ControlFlowNode node, StackVariable v) {
87+
// don't look further back than the source, or further forward than the sink
88+
exists(ImproperNullTerminationReachability r | r.isSourceActual(node, v)) or
89+
exists(ImproperNullTerminationReachability r | r.isSinkActual(node, v))
90+
}
91+
}
92+
93+
from
94+
ImproperNullTerminationReachability reaches, NullTerminationReachability nullTermReaches,
95+
ControlFlowNode source, LocalVariable v, VariableAccess sink
96+
where
97+
reaches.reaches(source, v, sink) and
98+
not exists(ControlFlowNode termination |
99+
nullTermReaches.reaches(termination, _, sink) and
100+
termination != source
101+
)
102+
select sink, "Variable $@ may not be null terminated.", v, v.getName()

0 commit comments

Comments
 (0)