From 63334764d7b35237006284c09c9c5fbde1c3ef75 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Fri, 25 Nov 2022 16:51:58 +0100 Subject: [PATCH 1/8] C++: Rewrite `cpp/path-injection` to not use `DefaultTaintTracking` --- .../src/Security/CWE/CWE-022/TaintedPath.ql | 41 +++++++++++++++---- .../SAMATE/TaintedPath/TaintedPath.expected | 14 ++----- .../CWE-022/semmle/tests/TaintedPath.expected | 12 +----- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index 928c99421773..c56d3191b6d6 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -17,8 +17,9 @@ import cpp import semmle.code.cpp.security.FunctionWithWrappers import semmle.code.cpp.security.Security -import semmle.code.cpp.security.TaintTracking -import TaintedWithPath +import semmle.code.cpp.ir.IR +import semmle.code.cpp.ir.dataflow.TaintTracking +import DataFlow::PathGraph /** * A function for opening a file. @@ -46,18 +47,42 @@ class FileFunction extends FunctionWithWrappers { override predicate interestingArg(int arg) { arg = 0 } } -class TaintedPathConfiguration extends TaintTrackingConfiguration { - override predicate isSink(Element tainted) { - exists(FileFunction fileFunction | fileFunction.outermostWrapperFunctionCall(tainted, _)) +Expr asSourceExpr(DataFlow::Node node) { + result in [node.asConvertedExpr(), node.asDefiningArgument()] +} + +Expr asSinkExpr(DataFlow::Node node) { + result = node.asConvertedExpr() + or + result = + node.asOperand() + .(SideEffectOperand) + .getUse() + .(ReadSideEffectInstruction) + .getArgumentDef() + .getUnconvertedResultExpression() +} + +class TaintedPathConfiguration extends TaintTracking::Configuration { + TaintedPathConfiguration() { this = "TaintedPathConfiguration" } + + override predicate isSource(DataFlow::Node node) { isUserInput(asSourceExpr(node), _) } + + override predicate isSink(DataFlow::Node node) { + exists(FileFunction fileFunction | + fileFunction.outermostWrapperFunctionCall(asSinkExpr(node), _) + ) } } from - FileFunction fileFunction, Expr taintedArg, Expr taintSource, PathNode sourceNode, - PathNode sinkNode, string taintCause, string callChain + FileFunction fileFunction, Expr taintedArg, Expr taintSource, TaintedPathConfiguration cfg, + DataFlow::PathNode sourceNode, DataFlow::PathNode sinkNode, string taintCause, string callChain where + taintedArg = asSinkExpr(sinkNode.getNode()) and fileFunction.outermostWrapperFunctionCall(taintedArg, callChain) and - taintedWithPath(taintSource, taintedArg, sourceNode, sinkNode) and + cfg.hasFlowPath(sourceNode, sinkNode) and + taintSource = asSourceExpr(sourceNode.getNode()) and isUserInput(taintSource, taintCause) select taintedArg, sourceNode, sinkNode, "This argument to a file access function is derived from $@ and then passed to " + callChain + ".", diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected index 28d9a9c82e8c..5286a25014f0 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected @@ -1,19 +1,11 @@ edges -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | (const char *)... | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | (const char *)... | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | -subpaths nodes -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | semmle.label | ... + ... | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | semmle.label | fgets output argument | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | (const char *)... | semmle.label | (const char *)... | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | semmle.label | data | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | semmle.label | data | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | semmle.label | data indirection | +subpaths #select -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | +| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | +| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected index 51cfc800db68..51037632d079 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected @@ -1,19 +1,11 @@ edges -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | (const char *)... | -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | (const char *)... | | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | -subpaths nodes | test.c:9:23:9:26 | argv | semmle.label | argv | -| test.c:9:23:9:26 | argv | semmle.label | argv | -| test.c:17:11:17:18 | (const char *)... | semmle.label | (const char *)... | -| test.c:17:11:17:18 | fileName | semmle.label | fileName | | test.c:17:11:17:18 | fileName | semmle.label | fileName | | test.c:17:11:17:18 | fileName indirection | semmle.label | fileName indirection | +subpaths #select | test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | +| test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | From 718663415b34a20fa68c447ae4a02825f02b2a32 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Sat, 26 Nov 2022 09:23:07 +0100 Subject: [PATCH 2/8] C++: Stop flow from going through another source Without this we get confusing results: ``` char *userAndFile = argv[2]; char *fileName = argv[1]; fopen(fileName, "wb+"); // Both argv[1] and argv[2] marked as source without // this change. ``` While here add some more test cases. --- .../src/Security/CWE/CWE-022/TaintedPath.ql | 2 ++ .../CWE-022/semmle/tests/TaintedPath.expected | 26 +++++++++++++++++++ .../CWE/CWE-022/semmle/tests/stdlib.h | 2 ++ .../Security/CWE/CWE-022/semmle/tests/test.c | 18 ++++++++++++- 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index c56d3191b6d6..f7c3b5584509 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -73,6 +73,8 @@ class TaintedPathConfiguration extends TaintTracking::Configuration { fileFunction.outermostWrapperFunctionCall(asSinkExpr(node), _) ) } + + override predicate isSanitizerIn(DataFlow::Node node) { this.isSource(node) } } from diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected index 51037632d079..1f82c83ea6e2 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected @@ -1,11 +1,37 @@ edges | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | +| test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName | +| test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName indirection | +| test.c:37:17:37:24 | fileName | test.c:38:11:38:18 | fileName indirection | +| test.c:37:17:37:24 | scanf output argument | test.c:38:11:38:18 | fileName indirection | +| test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName | +| test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName indirection | +| test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName | +| test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName indirection | nodes | test.c:9:23:9:26 | argv | semmle.label | argv | | test.c:17:11:17:18 | fileName | semmle.label | fileName | | test.c:17:11:17:18 | fileName indirection | semmle.label | fileName indirection | +| test.c:31:22:31:25 | argv | semmle.label | argv | +| test.c:32:11:32:18 | fileName | semmle.label | fileName | +| test.c:32:11:32:18 | fileName indirection | semmle.label | fileName indirection | +| test.c:37:17:37:24 | fileName | semmle.label | fileName | +| test.c:37:17:37:24 | scanf output argument | semmle.label | scanf output argument | +| test.c:38:11:38:18 | fileName indirection | semmle.label | fileName indirection | +| test.c:43:17:43:24 | fileName | semmle.label | fileName | +| test.c:43:17:43:24 | scanf output argument | semmle.label | scanf output argument | +| test.c:44:11:44:18 | fileName | semmle.label | fileName | +| test.c:44:11:44:18 | fileName indirection | semmle.label | fileName indirection | subpaths #select | test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | | test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | +| test.c:32:11:32:18 | fileName | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:31:22:31:25 | argv | user input (argv) | +| test.c:32:11:32:18 | fileName | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:31:22:31:25 | argv | user input (argv) | +| test.c:38:11:38:18 | fileName | test.c:37:17:37:24 | fileName | test.c:38:11:38:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:37:17:37:24 | fileName | user input (scanf) | +| test.c:38:11:38:18 | fileName | test.c:37:17:37:24 | scanf output argument | test.c:38:11:38:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:37:17:37:24 | fileName | user input (scanf) | +| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | +| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | +| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | +| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h index fe518804cc89..18e165ba5f99 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h @@ -11,3 +11,5 @@ FILE *fopen(const char *filename, const char *mode); int sprintf(char *s, const char *format, ...); size_t strlen(const char *s); char *strncat(char *s1, const char *s2, size_t n); +int scanf(const char *format, ...); +void *malloc(size_t size); diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c index 6c2e2316b24e..63ad5516b44e 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c @@ -26,5 +26,21 @@ int main(int argc, char** argv) { strncat(fileName+len, fixed, FILENAME_MAX-len-1); fopen(fileName, "wb+"); } -} + { + char *fileName = argv[1]; + fopen(fileName, "wb+"); // BAD + } + + { + char fileName[20]; + scanf("%s", fileName); + fopen(fileName, "wb+"); // BAD + } + + { + char *fileName = malloc(20 * sizeof(char)); + scanf("%s", fileName); + fopen(fileName, "wb+"); // BAD + } +} From 378206ae7dac4a21e7e1f18465410ac08857fe2b Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Mon, 28 Nov 2022 09:33:02 +0100 Subject: [PATCH 3/8] C++: Stop taint from flowing to arithmetic types These are not likely to give the user much control over what can be accessed. --- cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql | 4 ++++ .../Security/CWE/CWE-022/semmle/tests/stdlib.h | 2 ++ .../query-tests/Security/CWE/CWE-022/semmle/tests/test.c | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index f7c3b5584509..61c984d71eee 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -75,6 +75,10 @@ class TaintedPathConfiguration extends TaintTracking::Configuration { } override predicate isSanitizerIn(DataFlow::Node node) { this.isSource(node) } + + override predicate isSanitizer(DataFlow::Node node) { + node.asExpr().(Call).getTarget().getUnspecifiedType() instanceof ArithmeticType + } } from diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h index 18e165ba5f99..8ef60116657c 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h @@ -6,6 +6,7 @@ typedef struct {} FILE; #define FILENAME_MAX 1000 typedef unsigned long size_t; +#define NULL ((void*)0) FILE *fopen(const char *filename, const char *mode); int sprintf(char *s, const char *format, ...); @@ -13,3 +14,4 @@ size_t strlen(const char *s); char *strncat(char *s1, const char *s2, size_t n); int scanf(const char *format, ...); void *malloc(size_t size); +double strtod(const char *ptr, char **endptr); diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c index 63ad5516b44e..198037d6b52d 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c @@ -43,4 +43,12 @@ int main(int argc, char** argv) { scanf("%s", fileName); fopen(fileName, "wb+"); // BAD } + + { + char *aNumber = getenv("A_NUMBER"); + double number = strtod(aNumber, 0); + char fileName[20]; + sprintf(fileName, "/foo/%f", number); + fopen(fileName, "wb+"); // GOOD + } } From d3cccca7f16e03d75280cb220ecaedd78505a3c0 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Tue, 29 Nov 2022 11:16:24 +0100 Subject: [PATCH 4/8] C++: Filter duplicate (source, sink)-pairs --- .../src/Security/CWE/CWE-022/TaintedPath.ql | 19 +++++++++++++++++-- .../SAMATE/TaintedPath/TaintedPath.expected | 1 - .../CWE-022/semmle/tests/TaintedPath.expected | 6 ------ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index 61c984d71eee..4d383055c298 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -48,7 +48,9 @@ class FileFunction extends FunctionWithWrappers { } Expr asSourceExpr(DataFlow::Node node) { - result in [node.asConvertedExpr(), node.asDefiningArgument()] + result = node.asConvertedExpr() + or + result = node.asDefiningArgument() } Expr asSinkExpr(DataFlow::Node node) { @@ -79,6 +81,19 @@ class TaintedPathConfiguration extends TaintTracking::Configuration { override predicate isSanitizer(DataFlow::Node node) { node.asExpr().(Call).getTarget().getUnspecifiedType() instanceof ArithmeticType } + + predicate hasFilteredFlowPath(DataFlow::PathNode source, DataFlow::PathNode sink) { + this.hasFlowPath(source, sink) and + not exists(DataFlow::PathNode source2, DataFlow::PathNode sink2 | + this.hasFlowPath(source2, sink2) and + asSourceExpr(source.getNode()) = asSourceExpr(source2.getNode()) and + asSinkExpr(sink.getNode()) = asSinkExpr(sink2.getNode()) + | + not exists(source.getNode().asConvertedExpr()) and exists(source2.getNode().asConvertedExpr()) + or + not exists(sink.getNode().asConvertedExpr()) and exists(sink2.getNode().asConvertedExpr()) + ) + } } from @@ -87,7 +102,7 @@ from where taintedArg = asSinkExpr(sinkNode.getNode()) and fileFunction.outermostWrapperFunctionCall(taintedArg, callChain) and - cfg.hasFlowPath(sourceNode, sinkNode) and + cfg.hasFilteredFlowPath(sourceNode, sinkNode) and taintSource = asSourceExpr(sourceNode.getNode()) and isUserInput(taintSource, taintCause) select taintedArg, sourceNode, sinkNode, diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected index 5286a25014f0..d9ed395e45aa 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected @@ -8,4 +8,3 @@ nodes subpaths #select | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected index 1f82c83ea6e2..33f4a4b9d75e 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected @@ -26,12 +26,6 @@ nodes subpaths #select | test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | -| test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | | test.c:32:11:32:18 | fileName | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:31:22:31:25 | argv | user input (argv) | -| test.c:32:11:32:18 | fileName | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:31:22:31:25 | argv | user input (argv) | | test.c:38:11:38:18 | fileName | test.c:37:17:37:24 | fileName | test.c:38:11:38:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:37:17:37:24 | fileName | user input (scanf) | -| test.c:38:11:38:18 | fileName | test.c:37:17:37:24 | scanf output argument | test.c:38:11:38:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:37:17:37:24 | fileName | user input (scanf) | | test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | -| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | -| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | -| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | From 3dfe18b565fc10861b4702297ff2cfc78b959824 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Wed, 30 Nov 2022 21:08:36 +0100 Subject: [PATCH 5/8] C++: Introduce the coarse upper bound check from default taint tracking --- .../src/Security/CWE/CWE-022/TaintedPath.ql | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index 4d383055c298..f15a9bb1ad21 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -65,6 +65,29 @@ Expr asSinkExpr(DataFlow::Node node) { .getUnconvertedResultExpression() } +/** + * Holds for a variable that has any kind of upper-bound check anywhere in the program. + * This is biased towards being inclusive and being a coarse overapproximation because + * there are a lot of valid ways of doing an upper bounds checks if we don't consider + * where it occurs, for example: + * ```cpp + * if (x < 10) { sink(x); } + * + * if (10 > y) { sink(y); } + * + * if (z > 10) { z = 10; } + * sink(z); + * ``` + */ +predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + class TaintedPathConfiguration extends TaintTracking::Configuration { TaintedPathConfiguration() { this = "TaintedPathConfiguration" } @@ -80,6 +103,12 @@ class TaintedPathConfiguration extends TaintTracking::Configuration { override predicate isSanitizer(DataFlow::Node node) { node.asExpr().(Call).getTarget().getUnspecifiedType() instanceof ArithmeticType + or + exists(LoadInstruction load, Variable checkedVar | + load = node.asInstruction() and + checkedVar = load.getSourceAddress().(VariableAddressInstruction).getAstVariable() and + hasUpperBoundsCheck(checkedVar) + ) } predicate hasFilteredFlowPath(DataFlow::PathNode source, DataFlow::PathNode sink) { From 6dbc59d5b5ce7ba58350128a24709ce767c81285 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Mon, 5 Dec 2022 23:23:08 +0100 Subject: [PATCH 6/8] C++: Simplify `isSink` based on reviewer comments --- cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql | 11 +++-------- .../CWE-022/SAMATE/TaintedPath/TaintedPath.expected | 4 +--- .../CWE/CWE-022/semmle/tests/TaintedPath.expected | 13 +++---------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index f15a9bb1ad21..3003a8c281a6 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -54,8 +54,6 @@ Expr asSourceExpr(DataFlow::Node node) { } Expr asSinkExpr(DataFlow::Node node) { - result = node.asConvertedExpr() - or result = node.asOperand() .(SideEffectOperand) @@ -113,14 +111,11 @@ class TaintedPathConfiguration extends TaintTracking::Configuration { predicate hasFilteredFlowPath(DataFlow::PathNode source, DataFlow::PathNode sink) { this.hasFlowPath(source, sink) and - not exists(DataFlow::PathNode source2, DataFlow::PathNode sink2 | - this.hasFlowPath(source2, sink2) and - asSourceExpr(source.getNode()) = asSourceExpr(source2.getNode()) and - asSinkExpr(sink.getNode()) = asSinkExpr(sink2.getNode()) + not exists(DataFlow::PathNode source2 | + this.hasFlowPath(source2, sink) and + asSourceExpr(source.getNode()) = asSourceExpr(source2.getNode()) | not exists(source.getNode().asConvertedExpr()) and exists(source2.getNode().asConvertedExpr()) - or - not exists(sink.getNode().asConvertedExpr()) and exists(sink2.getNode().asConvertedExpr()) ) } } diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected index d9ed395e45aa..a4f42c917961 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/SAMATE/TaintedPath/TaintedPath.expected @@ -1,10 +1,8 @@ edges -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | nodes | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | semmle.label | fgets output argument | -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | semmle.label | data | | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | semmle.label | data indirection | subpaths #select -| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | +| CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | fgets output argument | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:77:23:77:26 | data indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | CWE23_Relative_Path_Traversal__char_console_fopen_11.cpp:55:27:55:38 | ... + ... | user input (fgets) | diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected index 33f4a4b9d75e..2306a3dc3843 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/TaintedPath.expected @@ -1,31 +1,24 @@ edges -| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | -| test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName | | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName indirection | | test.c:37:17:37:24 | fileName | test.c:38:11:38:18 | fileName indirection | | test.c:37:17:37:24 | scanf output argument | test.c:38:11:38:18 | fileName indirection | -| test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName | | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName indirection | -| test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName | | test.c:43:17:43:24 | scanf output argument | test.c:44:11:44:18 | fileName indirection | nodes | test.c:9:23:9:26 | argv | semmle.label | argv | -| test.c:17:11:17:18 | fileName | semmle.label | fileName | | test.c:17:11:17:18 | fileName indirection | semmle.label | fileName indirection | | test.c:31:22:31:25 | argv | semmle.label | argv | -| test.c:32:11:32:18 | fileName | semmle.label | fileName | | test.c:32:11:32:18 | fileName indirection | semmle.label | fileName indirection | | test.c:37:17:37:24 | fileName | semmle.label | fileName | | test.c:37:17:37:24 | scanf output argument | semmle.label | scanf output argument | | test.c:38:11:38:18 | fileName indirection | semmle.label | fileName indirection | | test.c:43:17:43:24 | fileName | semmle.label | fileName | | test.c:43:17:43:24 | scanf output argument | semmle.label | scanf output argument | -| test.c:44:11:44:18 | fileName | semmle.label | fileName | | test.c:44:11:44:18 | fileName indirection | semmle.label | fileName indirection | subpaths #select -| test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | -| test.c:32:11:32:18 | fileName | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:31:22:31:25 | argv | user input (argv) | +| test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:9:23:9:26 | argv | user input (argv) | +| test.c:32:11:32:18 | fileName | test.c:31:22:31:25 | argv | test.c:32:11:32:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:31:22:31:25 | argv | user input (argv) | | test.c:38:11:38:18 | fileName | test.c:37:17:37:24 | fileName | test.c:38:11:38:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:37:17:37:24 | fileName | user input (scanf) | -| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | +| test.c:44:11:44:18 | fileName | test.c:43:17:43:24 | fileName | test.c:44:11:44:18 | fileName indirection | This argument to a file access function is derived from $@ and then passed to fopen(filename). | test.c:43:17:43:24 | fileName | user input (scanf) | From 5637d573c1f8acead4f770ee32ae5a7e62025aa8 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Tue, 6 Dec 2022 08:29:27 +0100 Subject: [PATCH 7/8] C++: Add test case that is no longer detected after latest changes --- .../Security/CWE/CWE-022/semmle/tests/stdlib.h | 2 +- .../Security/CWE/CWE-022/semmle/tests/test.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h index 8ef60116657c..5b6483480f73 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/stdlib.h @@ -6,7 +6,6 @@ typedef struct {} FILE; #define FILENAME_MAX 1000 typedef unsigned long size_t; -#define NULL ((void*)0) FILE *fopen(const char *filename, const char *mode); int sprintf(char *s, const char *format, ...); @@ -15,3 +14,4 @@ char *strncat(char *s1, const char *s2, size_t n); int scanf(const char *format, ...); void *malloc(size_t size); double strtod(const char *ptr, char **endptr); +char *getenv(const char *name); diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c index 198037d6b52d..a11fd73edd7e 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-022/semmle/tests/test.c @@ -39,7 +39,7 @@ int main(int argc, char** argv) { } { - char *fileName = malloc(20 * sizeof(char)); + char *fileName = (char*)malloc(20 * sizeof(char)); scanf("%s", fileName); fopen(fileName, "wb+"); // BAD } @@ -51,4 +51,13 @@ int main(int argc, char** argv) { sprintf(fileName, "/foo/%f", number); fopen(fileName, "wb+"); // GOOD } + + { + void read(const char *fileName); + read(argv[1]); // BAD [NOT DETECTED] + } +} + +void read(char *fileName) { + fopen(fileName, "wb+"); } From 995efef5dab179d08a43670d45a3e0dcd1b6d671 Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Tue, 6 Dec 2022 09:03:21 +0100 Subject: [PATCH 8/8] C++: Add explanatory comment to `hasFilteredFlowPath` --- cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql index 3003a8c281a6..be4f57454aaf 100644 --- a/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql +++ b/cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql @@ -111,6 +111,10 @@ class TaintedPathConfiguration extends TaintTracking::Configuration { predicate hasFilteredFlowPath(DataFlow::PathNode source, DataFlow::PathNode sink) { this.hasFlowPath(source, sink) and + // The use of `isUserInput` in `isSink` in combination with `asSourceExpr` causes + // duplicate results. Filter these duplicates. The proper solution is to switch to + // using `LocalFlowSource` and `RemoteFlowSource`, but this currently only supports + // a subset of the cases supported by `isUserInput`. not exists(DataFlow::PathNode source2 | this.hasFlowPath(source2, sink) and asSourceExpr(source.getNode()) = asSourceExpr(source2.getNode())