|
| 1 | +// Copyright (c) Microsoft Corporation. |
| 2 | +// Licensed under the MIT license. |
| 3 | + |
| 4 | +/** |
| 5 | + * @name Non-constant format string |
| 6 | + * @description Passing a value that is not a string literal 'format' string to a printf-like function can lead |
| 7 | + * to a mismatch between the number of arguments defined by the 'format' and the number |
| 8 | + * of arguments actually passed to the function. If the format string ultimately stems |
| 9 | + * from an untrusted source, this can be used for exploits. |
| 10 | + * This query finds format strings coming from non-literal sources. Note that format strings of |
| 11 | + * type `const char*` it is still considered non-constant if the value is not coming from a string |
| 12 | + * literal. For example, for a parameter with type `const char*` of an exported function that is |
| 13 | + * used as a format string, there is no way to ensure the originating value was a string literal. |
| 14 | + * @kind path-problem |
| 15 | + * @problem.severity recommendation |
| 16 | + * @security-severity 9.3 |
| 17 | + * @precision high |
| 18 | + * @id cpp/microsoft/public/likely-bugs/format/non-constant-format |
| 19 | + * @tags maintainability |
| 20 | + * correctness |
| 21 | + * security |
| 22 | + * external/cwe/cwe-134 |
| 23 | + */ |
| 24 | + |
| 25 | +import semmle.code.cpp.ir.dataflow.TaintTracking |
| 26 | +import semmle.code.cpp.commons.Printf |
| 27 | +import semmle.code.cpp.security.FlowSources |
| 28 | +import semmle.code.cpp.ir.dataflow.internal.ModelUtil |
| 29 | +import semmle.code.cpp.models.interfaces.DataFlow |
| 30 | +import semmle.code.cpp.models.interfaces.Taint |
| 31 | +import semmle.code.cpp.ir.IR |
| 32 | +import NonConstFlow::PathGraph |
| 33 | + |
| 34 | +class UncalledFunction extends Function { |
| 35 | + UncalledFunction() { |
| 36 | + not exists(Call c | c.getTarget() = this) and |
| 37 | + // Ignore functions that appear to be function pointers |
| 38 | + // function pointers may be seen as uncalled statically |
| 39 | + not exists(FunctionAccess fa | fa.getTarget() = this) |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +/** The `unsigned short` type. */ |
| 44 | +class UnsignedShort extends ShortType { |
| 45 | + UnsignedShort() { this.isUnsigned() } |
| 46 | +} |
| 47 | + |
| 48 | +/** |
| 49 | + * Holds if `t` cannot refer to a string. That is, it's a built-in |
| 50 | + * or arithmetic type that is not a "`char` like" type. |
| 51 | + */ |
| 52 | +predicate cannotContainString(Type t) { |
| 53 | + exists(Type unspecified | |
| 54 | + unspecified = t.getUnspecifiedType() and |
| 55 | + not unspecified instanceof UnknownType and |
| 56 | + not unspecified instanceof CharType and |
| 57 | + not unspecified instanceof WideCharType and |
| 58 | + not unspecified instanceof Char8Type and |
| 59 | + not unspecified instanceof Char16Type and |
| 60 | + not unspecified instanceof Char32Type and |
| 61 | + // C often defines `wchar_t` as `unsigned short` |
| 62 | + not unspecified instanceof UnsignedShort |
| 63 | + | |
| 64 | + unspecified instanceof ArithmeticType or |
| 65 | + unspecified instanceof BuiltInType |
| 66 | + ) |
| 67 | +} |
| 68 | + |
| 69 | +predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) { |
| 70 | + func.(DataFlowFunction).hasDataFlow(_, output) or |
| 71 | + func.(TaintFunction).hasTaintFlow(_, output) |
| 72 | +} |
| 73 | + |
| 74 | +/** |
| 75 | + * Holds if `node` is a non-constant source of data flow for non-const format string detection. |
| 76 | + * This is defined as either: |
| 77 | + * 1) a `FlowSource` |
| 78 | + * 2) a parameter of an 'uncalled' function |
| 79 | + * 3) an argument to a function with no definition that is not known to define the output through its input |
| 80 | + * 4) an out arg of a function with no definition that is not known to define the output through its input |
| 81 | + * |
| 82 | + * The latter two cases address identifying standard string manipulation libraries as input sources |
| 83 | + * e.g., strcpy. More simply, functions without definitions that are known to manipulate the |
| 84 | + * input to produce an output are not sources. Instead the ultimate source of input to these functions |
| 85 | + * should be considered as the source. |
| 86 | + * |
| 87 | + * False Negative Implication: This approach has false negatives (fails to identify non-const sources) |
| 88 | + * when the source is a field of a struct or object and the initialization is not observed statically. |
| 89 | + * There are 3 general cases where this can occur: |
| 90 | + * 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string. |
| 91 | + * 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically. |
| 92 | + * e.g., an object constructor isn't known statically, or a function sets fields |
| 93 | + * of a struct, but the function is not known statically. |
| 94 | + * 3) A function meeting cases (3) and (4) above returns (through an out argument or return value) |
| 95 | + * a struct or object where a field containing a format string has been initialized. |
| 96 | + * |
| 97 | + * Note, uninitialized variables used as format strings are never detected by design. |
| 98 | + * Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query. |
| 99 | + */ |
| 100 | +predicate isNonConst(DataFlow::Node node) { |
| 101 | + node instanceof FlowSource |
| 102 | + or |
| 103 | + // Parameters of uncalled functions that aren't const |
| 104 | + exists(UncalledFunction f, Parameter p | |
| 105 | + f.getAParameter() = p and |
| 106 | + // We pick the indirection of the parameter since this query is focused |
| 107 | + // on strings. |
| 108 | + p = node.asParameter(1) and |
| 109 | + // Ignore main's argv parameter as it is already considered a `FlowSource` |
| 110 | + // not ignoring it will result in path redundancies |
| 111 | + (f.getName() = "main" implies p != f.getParameter(1)) |
| 112 | + ) |
| 113 | + or |
| 114 | + // Consider as an input any out arg of a function or a function's return where the function is not: |
| 115 | + // 1. a function with a known dataflow or taintflow from input to output and the `node` is the output |
| 116 | + // 2. a function where there is a known definition |
| 117 | + // i.e., functions that with unknown bodies and are not known to define the output through its input |
| 118 | + // are considered as possible non-const sources |
| 119 | + // The function's output must also not be const to be considered a non-const source |
| 120 | + exists(Function func, CallInstruction call | |
| 121 | + not func.hasDefinition() and |
| 122 | + func = call.getStaticCallTarget() |
| 123 | + | |
| 124 | + // Case 1: It's a known dataflow or taintflow function with flow to the return value |
| 125 | + call.getUnconvertedResultExpression() = node.asIndirectExpr() and |
| 126 | + not exists(FunctionOutput output | |
| 127 | + dataFlowOrTaintFlowFunction(func, output) and |
| 128 | + output.isReturnValueDeref(_) and |
| 129 | + node = callOutput(call, output) |
| 130 | + ) |
| 131 | + or |
| 132 | + // Case 2: It's a known dataflow or taintflow function with flow to an output parameter |
| 133 | + exists(int i | |
| 134 | + call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() = |
| 135 | + node.asDefiningArgument() and |
| 136 | + not exists(FunctionOutput output | |
| 137 | + dataFlowOrTaintFlowFunction(func, output) and |
| 138 | + output.isParameterDeref(i, _) and |
| 139 | + node = callOutput(call, output) |
| 140 | + ) |
| 141 | + ) |
| 142 | + ) |
| 143 | +} |
| 144 | + |
| 145 | +/** |
| 146 | + * Holds if `sink` is a sink is a format string of any |
| 147 | + * `FormattingFunctionCall`. |
| 148 | + */ |
| 149 | +predicate isSinkImpl(DataFlow::Node sink, Expr formatString) { |
| 150 | + sink.asIndirectExpr() = formatString and |
| 151 | + exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex())) |
| 152 | +} |
| 153 | + |
| 154 | +module NonConstFlowConfig implements DataFlow::ConfigSig { |
| 155 | + predicate isSource(DataFlow::Node source) { |
| 156 | + exists(Type t | |
| 157 | + isNonConst(source) and |
| 158 | + t = source.getType() and |
| 159 | + not cannotContainString(t) |
| 160 | + ) |
| 161 | + } |
| 162 | + |
| 163 | + predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) } |
| 164 | + |
| 165 | + predicate isBarrier(DataFlow::Node node) { |
| 166 | + // Ignore tracing non-const through array indices |
| 167 | + exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr()) |
| 168 | + or |
| 169 | + exists(Type t | |
| 170 | + t = node.getType() and |
| 171 | + cannotContainString(t) |
| 172 | + ) |
| 173 | + } |
| 174 | + |
| 175 | + predicate observeDiffInformedIncrementalMode() { any() } |
| 176 | + |
| 177 | + Location getASelectedSinkLocation(DataFlow::Node sink) { |
| 178 | + exists(FormattingFunctionCall call, Expr formatString | |
| 179 | + result = [call.getLocation(), sink.getLocation()] |
| 180 | + | |
| 181 | + isSinkImpl(sink, formatString) and |
| 182 | + call.getArgument(call.getFormatParameterIndex()) = formatString |
| 183 | + ) |
| 184 | + } |
| 185 | +} |
| 186 | + |
| 187 | +module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>; |
| 188 | + |
| 189 | +from |
| 190 | + FormattingFunctionCall call, Expr formatString, NonConstFlow::PathNode sink, |
| 191 | + NonConstFlow::PathNode source |
| 192 | +where |
| 193 | + isSinkImpl(sink.getNode(), formatString) and |
| 194 | + call.getArgument(call.getFormatParameterIndex()) = formatString and |
| 195 | + NonConstFlow::flowPath(source, sink) |
| 196 | +select sink.getNode(), source, sink, |
| 197 | + "The format string argument to $@ has a source which cannot be " + |
| 198 | + "verified to originate from a string literal.", call, call.getTarget().getName() |
0 commit comments