branedev2 · branedev2 · Jul 14, 2025 · gemini-code-assist · Jul 14, 2025 · gemini-code-assist
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPISinkExample b/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPISinkExample
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPISinkExample.go b/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPISinkExample.go
@@ -0,0 +1,30 @@
+// {fact rule=cross-site-scripting@v1.0 defects=0}
+package main
+
+import (
+	"fmt"
+	"net/http"
+)
+
+func isValidUsername(username string) bool {
+	return true
+}
+
+func serve() {
+	http.HandleFunc("/user", func(w http.ResponseWriter, r *http.Request) {
+		r.ParseForm()
+		username := r.Form.Get("username")
+		if !isValidUsername(username) {
+			// BAD: a request parameter is incorporated without validation into the response
+			fmt.Fprintf(w, "%q is an unknown user", username)
+		} else {
+			// TODO: Handle successful login
+		}
+	})
+	http.ListenAndServe(":80", nil)
+}
+// {/fact}
+
+func main() {
+
+}
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPITaintStepExample.go b/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPITaintStepExample.go
@@ -0,0 +1,16 @@
+// {fact rule=sql-injection@v1.0 defects=1}
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"net/http"
+)
+
+func handler(db *sql.DB, req *http.Request) {
+	q := fmt.Sprintf("SELECT ITEM,PRICE FROM PRODUCT WHERE ITEM_CATEGORY='%s' ORDER BY PRICE",
+		req.URL.Query()["category"])
+	db.Query(q)
+}
+
+// {/fact}
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPIsUsedWithUntrustedData.qhelp b/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPIsUsedWithUntrustedData.qhelp
@@ -0,0 +1,53 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>Using unsanitized untrusted data in an external API can cause a variety of security issues. This query reports 
+all external APIs that are used with untrusted data, along with how frequently the API is used, and how many 
+unique sources of untrusted data flow to this API. This query is designed primarily to help identify which APIs 
+may be relevant for security analysis of this application.</p>
+
+<p>An external API is defined as a call to a function that is not defined in the source code and is not
+modeled as a taint step in the default taint library. Calls made in test files are excluded.
+External APIs may be from the Go standard library, third party dependencies, or from internal dependencies.
+The query will report the fully-qualified method name, along with either <code>[param x]</code>,
+where <code>x</code> indicates the position of the parameter receiving the untrusted data, or <code>[receiver]</code>
+indicating that the untrusted data is used as the receiver of the method call.</p>
+
+</overview>
+<recommendation>
+
+<p>For each result:</p>
+
+<ul>
+  <li>If the result highlights a known sink, no action is required.</li>
+  <li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query.</li>
+  <li>If the result represents a call to an external API which transfers taint (for example, from a parameter to its return value), add the appropriate modeling, and 
+  re-run the query to determine what new results have appeared due to this additional modeling.</li>
+</ul>
+
+<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPIMethod</code> 
+class to exclude known safe external APIs from future analysis.</p>
+
+</recommendation>
+<example>
+
+<sample src="ExternalAPISinkExample.go" />
+
+<p>If the query were to return the API <code>fmt.Fprintf [param 2]</code> then we should first consider
+whether this a security relevant sink. In this case, this is writing to an HTTP response, so we should
+consider whether this is an XSS sink. If it is, we should confirm that it is handled by the "Reflected cross-site scripting" query (<code>go/reflected-xss</code>).</p>
+
+<sample src="ExternalAPITaintStepExample.go" />
+
+<p>If the query were to return the API <code>fmt.Sprintf [param 1]</code>, then this should be
+reviewed as a possible taint step, because tainted data would flow from the first argument to the return value
+of the call.</p>
+
+<p>Note that both examples are correctly handled by the standard taint tracking library and the "Reflected cross-site scripting" query (<code>go/reflected-xss</code>).</p>
+</example>
+<references>
+
+</references>
+</qhelp>
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPIsUsedWithUntrustedData.ql b/PR_9_golang/go_lang/go_codeql/CWE-020/ExternalAPIsUsedWithUntrustedData.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Frequency counts for external APIs that are used with untrusted data
+ * @description This reports the external APIs that are used with untrusted data, along with how
+ *              frequently the API is called, and how many unique sources of untrusted data flow
+ *              to it.
+ * @id go/count-untrusted-data-external-api
+ * @kind table
+ * @tags security external/cwe/cwe-20
+ */
+
+import go
+import semmle.go.security.ExternalAPIs
+
+from ExternalApiUsedWithUntrustedData externalApi
+select externalApi, count(externalApi.getUntrustedDataNode()) as numberOfUses,
+  externalApi.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
+    numberOfUntrustedSources desc
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexp.go b/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexp.go
@@ -0,0 +1,18 @@
+// {fact rule=cryptographic-key-generator@v1.0 defects=1}
+package main
+
+import (
+	"errors"
+	"net/http"
+	"regexp"
+)
+
+func checkRedirect(req *http.Request, via []*http.Request) error {
+	// BAD: the host of `req.URL` may be controlled by an attacker
+	re := "^((www|beta).)?example.com/"
-	re := "^((www|beta).)?example.com/"
+re := "^((www|beta)\\.)?example\\.com$"
-	re := "^((www|beta).)?example.com/"
+re := "^((www|beta)\\.)?example\\.com$"
+	if matched, _ := regexp.MatchString(re, req.URL.Host); matched {
+		return nil
+	}
+	return errors.New("Invalid redirect")
+}
+// {/fact}
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexp.qhelp b/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexp.qhelp
@@ -0,0 +1,50 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Sanitizing untrusted URLs is an important technique for preventing attacks such as request
+forgeries and malicious redirections. Often, this is done by checking that the host of a URL
+is in a set of allowed hosts.
+</p>
+<p>
+If a regular expression implements such a check, it is easy to accidentally make the check too
+permissive by not escaping regular-expression meta-characters such as <code>.</code>.
+</p>
+<p>
+Even if the check is not used in a security-critical context, the incomplete check may still cause
+undesirable behavior when it accidentally succeeds.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Escape all meta-characters appropriately when constructing regular expressions for security checks,
+paying special attention to the <code>.</code> meta-character.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example code checks that a URL redirection will reach the <code>example.com</code>
+domain, or one of its subdomains.
+</p>
+<sample src="IncompleteHostnameRegexp.go"/>
+<p>
+The check is however easy to bypass because the unescaped <code>.</code> allows for any character
+before <code>example.com</code>, effectively allowing the redirect to go to an attacker-controlled
+domain such as <code>wwwXexample.com</code>.
+</p>
+<p>
+Address this vulnerability by escaping <code>.</code> appropriately:
+</p>
+<sample src="IncompleteHostnameRegexpGood.go"/>
+</example>
+
+<references>
+<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
+<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
+</references>
+</qhelp>
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexp.ql b/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexp.ql
@@ -0,0 +1,116 @@
+/**
+ * @name Incomplete regular expression for hostnames
+ * @description Matching a URL or hostname against a regular expression that contains an unescaped
+ *              dot as part of the hostname might match more hostnames than expected.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision high
+ * @id go/incomplete-hostname-regexp
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-20
+ */
+
+import go
+
+/**
+ * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
+ * and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
+ */
+bindingset[pattern]
+predicate isIncompleteHostNameRegexpPattern(string pattern, string hostPart) {
+  hostPart =
+    pattern
+        .regexpCapture("(?i).*?" +
+            // an unescaped single `.`
+            "(?<!\\\\)[.]" +
+            // immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in,
+            // followed by a known TLD
+            "(([():|?a-z0-9-]+(\\\\)?[.])?" + commonTld() + ")" + ".*", 1)
+}
+
+/** Holds if `b` sets the HTTP status code (represented by a pseudo-header named  `status`) */
+predicate writesHttpError(ReachableBasicBlock b) {
+  forex(Http::HeaderWrite hw |
+    hw.getHeaderName() = "status" and hw.asInstruction().getBasicBlock() = b
+  |
+    exists(string code | code.matches("4__") or code.matches("5__") |
+      hw.definesHeader("status", code)
+    )
+  )
+}
+
+/** Holds if starting from `block` a status code representing an error is certainly set. */
+predicate onlyErrors(BasicBlock block) {
+  writesHttpError(block)
+  or
+  forex(ReachableBasicBlock pred | pred = block.getAPredecessor() | onlyErrors(pred))
+}
+
+/** Gets a node that refers to a handler that is considered to return an HTTP error. */
+DataFlow::Node getASafeHandler() {
+  exists(Variable v |
+    v.hasQualifiedName(ElazarlGoproxy::packagePath(), ["AlwaysReject", "RejectConnect"])
+  |
+    result = v.getARead()
+  )
+  or
+  exists(Function f |
+    onlyErrors(f.getFuncDecl().(ControlFlow::Root).getExitNode().getBasicBlock())
+  |
+    result = f.getARead()
+  )
+}
+
+/** Holds if `regexp` is used in a check before `handler` is called. */
+predicate regexpGuardsHandler(RegexpPattern regexp, Http::RequestHandler handler) {
+  handler.guardedBy(DataFlow::exprNode(regexp.getAUse().asExpr().getParent*()))
+}
+
+/** Holds if `regexp` guards an HTTP error write. */
+predicate regexpGuardsError(RegexpPattern regexp) {
+  exists(ControlFlow::ConditionGuardNode cond, RegexpMatchFunction match, DataFlow::CallNode call |
+    call.getTarget() = match and
+    match.getRegexp(call) = regexp
+  |
+    cond.ensures(match.getResult().getNode(call).getASuccessor*(), true) and
+    cond.dominates(any(ReachableBasicBlock b | writesHttpError(b)))
+  )
+}
+
+module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig {
+  additional predicate isSourceString(DataFlow::Node source, string hostPart) {
+    exists(Expr e |
+      e = source.asExpr() and
+      isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart)
+    |
+      e instanceof StringLit
+      or
+      e instanceof AddExpr and
+      not isIncompleteHostNameRegexpPattern(e.(AddExpr).getAnOperand().getStringValue(), _)
+    )
+  }
+
+  predicate isSource(DataFlow::Node source) { isSourceString(source, _) }
+
+  predicate isSink(DataFlow::Node sink) {
+    sink instanceof RegexpPattern and
+    forall(Http::RequestHandler handler | regexpGuardsHandler(sink, handler) |
+      not handler = getASafeHandler()
+    ) and
+    not regexpGuardsError(sink)
+  }
+}
+
+module Flow = DataFlow::Global<IncompleteHostNameRegexpConfig>;
+
+import Flow::PathGraph
+
+from Flow::PathNode source, Flow::PathNode sink, string hostPart
+where
+  Flow::flowPath(source, sink) and
+  IncompleteHostNameRegexpConfig::isSourceString(source.getNode(), hostPart)
+select source, source, sink,
+  "This regular expression has an unescaped dot before '" + hostPart + "', " +
+    "so it might match more hosts than expected when $@.", sink, "the regular expression is used"
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexpGood.go b/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteHostnameRegexpGood.go
@@ -0,0 +1,18 @@
+// {fact rule=cryptographic-key-generator@v1.0 defects=0}
+package main
+
+import (
+	"errors"
+	"net/http"
+	"regexp"
+)
+
+func checkRedirectGood(req *http.Request, via []*http.Request) error {
+	// GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com`
+	re := "^((www|beta)\\.)?example\\.com/"
+	if matched, _ := regexp.MatchString(re, req.URL.Host); matched {
+		return nil
+	}
+	return errors.New("Invalid redirect")
+}
+// {/fact}
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteUrlSchemeCheck.go b/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteUrlSchemeCheck.go
@@ -0,0 +1,14 @@
+// {fact rule=cryptographic-key-generator@v1.0 defects=1}
+package main
+
+import "net/url"
+
+func sanitizeUrl(urlstr string) string {
+	u, err := url.Parse(urlstr)
+	if err != nil || u.Scheme == "javascript" {
+		return "about:blank"
+	}
+	return urlstr
+}
+
+// {/fact}
diff --git a/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteUrlSchemeCheck.qhelp b/PR_9_golang/go_lang/go_codeql/CWE-020/IncompleteUrlSchemeCheck.qhelp
@@ -0,0 +1,42 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+<overview>
+<p>
+URLs with the special scheme <code>javascript</code> can be used to encode JavaScript code to be
+executed when the URL is visited. While this is a powerful mechanism for creating feature-rich and
+responsive web applications, it is also a potential security risk: if the URL comes from an
+untrusted source, it might contain harmful JavaScript code. For this reason, many frameworks and
+libraries first check the URL scheme of any untrusted URL, and reject URLs with the
+<code>javascript</code> scheme.
+</p>
+<p>
+However, the <code>data</code> and <code>vbscript</code> schemes can be used to represent
+executable code in a very similar way, so any validation logic that checks against
+<code>javascript</code>, but not against <code>data</code> and <code>vbscript</code>, is likely
+to be insufficient.
+</p>
+</overview>
+<recommendation>
+<p>
+Add checks covering both <code>data:</code> and <code>vbscript:</code>.
+</p>
+</recommendation>
+<example>
+<p>
+The following function validates a (presumably untrusted) URL <code>urlstr</code>. If its scheme is
+<code>javascript</code>, the harmless placeholder URL <code>about:blank</code> is returned to
+prevent code injection; otherwise <code>urlstr</code> itself is returned.
+</p>
+<sample src="IncompleteUrlSchemeCheck.go"/>
+<p>
+While this check provides partial projection, it should be extended to cover <code>data</code>
+and <code>vbscript</code> as well:
+</p>
+<sample src="IncompleteUrlSchemeCheckGood.go"/>
+</example>
+<references>
+<li>WHATWG: <a href="https://wiki.whatwg.org/wiki/URL_schemes">URL schemes</a>.</li>
+</references>
+</qhelp>