mirror of
https://github.com/gocsaf/csaf.git
synced 2025-12-22 05:40:11 +01:00
Improve joining of url paths in some situations
* Use url.JoinPath to join URLs from a few places. * Add util/joinpath.go from go 1.19, add the license in REUSE 3.0 compatible manner. resolve #223 Co-authored-by: Bernhard Reiter <bernhard@intevation.de>
This commit is contained in:
parent
324de3abca
commit
9cba4eec30
4 changed files with 388 additions and 3 deletions
51
LICENSES/LicenseRef-Go119-BSD-Patentgrant.txt
Normal file
51
LICENSES/LicenseRef-Go119-BSD-Patentgrant.txt
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
|
Additional IP Rights Grant (Patents)
|
||||||
|
|
||||||
|
"This implementation" means the copyrightable works distributed by
|
||||||
|
Google as part of the Go project.
|
||||||
|
|
||||||
|
Google hereby grants to You a perpetual, worldwide, non-exclusive,
|
||||||
|
no-charge, royalty-free, irrevocable (except as stated in this section)
|
||||||
|
patent license to make, have made, use, offer to sell, sell, import,
|
||||||
|
transfer and otherwise run, modify and propagate the contents of this
|
||||||
|
implementation of Go, where such license applies only to those patent
|
||||||
|
claims, both currently owned or controlled by Google and acquired in
|
||||||
|
the future, licensable by Google that are necessarily infringed by this
|
||||||
|
implementation of Go. This grant does not include claims that would be
|
||||||
|
infringed only as a consequence of further modification of this
|
||||||
|
implementation. If you or your agent or exclusive licensee institute or
|
||||||
|
order or agree to the institution of patent litigation against any
|
||||||
|
entity (including a cross-claim or counterclaim in a lawsuit) alleging
|
||||||
|
that this implementation of Go or any code incorporated within this
|
||||||
|
implementation of Go constitutes direct or contributory patent
|
||||||
|
infringement, or inducement of patent infringement, then any patent
|
||||||
|
rights granted to you under this License for this implementation of Go
|
||||||
|
shall terminate as of the date such litigation is filed.
|
||||||
|
|
@ -639,7 +639,13 @@ func (p *processor) processROLIEFeed(feed string) error {
|
||||||
func (p *processor) checkIndex(base string, mask whereType) error {
|
func (p *processor) checkIndex(base string, mask whereType) error {
|
||||||
client := p.httpClient()
|
client := p.httpClient()
|
||||||
|
|
||||||
index := base + "/index.txt"
|
bu, err := url.Parse(base)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
index := util.JoinURLPath(bu, "index.txt").String()
|
||||||
|
|
||||||
p.checkTLS(index)
|
p.checkTLS(index)
|
||||||
|
|
||||||
p.badIndices.use()
|
p.badIndices.use()
|
||||||
|
|
@ -680,9 +686,16 @@ func (p *processor) checkIndex(base string, mask whereType) error {
|
||||||
// of the fields' values and if they are sorted properly. Then it passes the files to the
|
// of the fields' values and if they are sorted properly. Then it passes the files to the
|
||||||
// "integrity" functions. It returns error if some test fails, otherwise nil.
|
// "integrity" functions. It returns error if some test fails, otherwise nil.
|
||||||
func (p *processor) checkChanges(base string, mask whereType) error {
|
func (p *processor) checkChanges(base string, mask whereType) error {
|
||||||
client := p.httpClient()
|
|
||||||
changes := base + "/changes.csv"
|
bu, err := url.Parse(base)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
changes := util.JoinURLPath(bu, "changes.csv").String()
|
||||||
|
|
||||||
p.checkTLS(changes)
|
p.checkTLS(changes)
|
||||||
|
|
||||||
|
client := p.httpClient()
|
||||||
res, err := client.Get(changes)
|
res, err := client.Get(changes)
|
||||||
|
|
||||||
p.badChanges.use()
|
p.badChanges.use()
|
||||||
|
|
|
||||||
301
util/joinpath.go
Normal file
301
util/joinpath.go
Normal file
|
|
@ -0,0 +1,301 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-Go119-BSD-Patentgrant
|
||||||
|
// SPDX-FileCopyrightText: 2009 The Go Authors, Google Inc.
|
||||||
|
|
||||||
|
// The code of this file was extracted and adjusted from
|
||||||
|
// https://cs.opensource.google/go/go/+/refs/tags/go1.19rc2:src/net/url/url.go
|
||||||
|
// by Intevation 2022
|
||||||
|
|
||||||
|
//go:build !go1.19
|
||||||
|
|
||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type encoding int
|
||||||
|
|
||||||
|
const (
|
||||||
|
encodePath encoding = 1 + iota
|
||||||
|
encodePathSegment
|
||||||
|
encodeHost
|
||||||
|
encodeZone
|
||||||
|
encodeUserPassword
|
||||||
|
encodeQueryComponent
|
||||||
|
encodeFragment
|
||||||
|
)
|
||||||
|
|
||||||
|
const upperhex = "0123456789ABCDEF"
|
||||||
|
|
||||||
|
func ishex(c byte) bool {
|
||||||
|
switch {
|
||||||
|
case '0' <= c && c <= '9':
|
||||||
|
return true
|
||||||
|
case 'a' <= c && c <= 'f':
|
||||||
|
return true
|
||||||
|
case 'A' <= c && c <= 'F':
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func unhex(c byte) byte {
|
||||||
|
switch {
|
||||||
|
case '0' <= c && c <= '9':
|
||||||
|
return c - '0'
|
||||||
|
case 'a' <= c && c <= 'f':
|
||||||
|
return c - 'a' + 10
|
||||||
|
case 'A' <= c && c <= 'F':
|
||||||
|
return c - 'A' + 10
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if the specified character should be escaped when
|
||||||
|
// appearing in a URL string, according to RFC 3986.
|
||||||
|
//
|
||||||
|
// Please be informed that for now shouldEscape does not check all
|
||||||
|
// reserved characters correctly. See golang.org/issue/5684.
|
||||||
|
func shouldEscape(c byte, mode encoding) bool {
|
||||||
|
// §2.3 Unreserved characters (alphanum)
|
||||||
|
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if mode == encodeHost || mode == encodeZone {
|
||||||
|
// §3.2.2 Host allows
|
||||||
|
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
||||||
|
// as part of reg-name.
|
||||||
|
// We add : because we include :port as part of host.
|
||||||
|
// We add [ ] because we include [ipv6]:port as part of host.
|
||||||
|
// We add < > because they're the only characters left that
|
||||||
|
// we could possibly allow, and Parse will reject them if we
|
||||||
|
// escape them (because hosts can't use %-encoding for
|
||||||
|
// ASCII bytes).
|
||||||
|
switch c {
|
||||||
|
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch c {
|
||||||
|
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
|
||||||
|
return false
|
||||||
|
|
||||||
|
case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
|
||||||
|
// Different sections of the URL allow a few of
|
||||||
|
// the reserved characters to appear unescaped.
|
||||||
|
switch mode {
|
||||||
|
case encodePath: // §3.3
|
||||||
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
||||||
|
// meaning to individual path segments. This package
|
||||||
|
// only manipulates the path as a whole, so we allow those
|
||||||
|
// last three as well. That leaves only ? to escape.
|
||||||
|
return c == '?'
|
||||||
|
|
||||||
|
case encodePathSegment: // §3.3
|
||||||
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
||||||
|
// meaning to individual path segments.
|
||||||
|
return c == '/' || c == ';' || c == ',' || c == '?'
|
||||||
|
|
||||||
|
case encodeUserPassword: // §3.2.1
|
||||||
|
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
|
||||||
|
// userinfo, so we must escape only '@', '/', and '?'.
|
||||||
|
// The parsing of userinfo treats ':' as special so we must escape
|
||||||
|
// that too.
|
||||||
|
return c == '@' || c == '/' || c == '?' || c == ':'
|
||||||
|
|
||||||
|
case encodeQueryComponent: // §3.4
|
||||||
|
// The RFC reserves (so we must escape) everything.
|
||||||
|
return true
|
||||||
|
|
||||||
|
case encodeFragment: // §4.1
|
||||||
|
// The RFC text is silent but the grammar allows
|
||||||
|
// everything, so escape nothing.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if mode == encodeFragment {
|
||||||
|
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
|
||||||
|
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
|
||||||
|
// need to be escaped. To minimize potential breakage, we apply two restrictions:
|
||||||
|
// (1) we always escape sub-delims outside of the fragment, and (2) we always
|
||||||
|
// escape single quote to avoid breaking callers that had previously assumed that
|
||||||
|
// single quotes would be escaped. See issue #19917.
|
||||||
|
switch c {
|
||||||
|
case '!', '(', ')', '*':
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Everything else must be escaped.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// unescape unescapes a string; the mode specifies
|
||||||
|
// which section of the URL string is being unescaped.
|
||||||
|
func unescape(s string, mode encoding) (string, error) {
|
||||||
|
// Count %, check that they're well-formed.
|
||||||
|
n := 0
|
||||||
|
hasPlus := false
|
||||||
|
for i := 0; i < len(s); {
|
||||||
|
switch s[i] {
|
||||||
|
case '%':
|
||||||
|
n++
|
||||||
|
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
||||||
|
s = s[i:]
|
||||||
|
if len(s) > 3 {
|
||||||
|
s = s[:3]
|
||||||
|
}
|
||||||
|
return "", url.EscapeError(s)
|
||||||
|
}
|
||||||
|
// Per https://tools.ietf.org/html/rfc3986#page-21
|
||||||
|
// in the host component %-encoding can only be used
|
||||||
|
// for non-ASCII bytes.
|
||||||
|
// But https://tools.ietf.org/html/rfc6874#section-2
|
||||||
|
// introduces %25 being allowed to escape a percent sign
|
||||||
|
// in IPv6 scoped-address literals. Yay.
|
||||||
|
if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
|
||||||
|
return "", url.EscapeError(s[i : i+3])
|
||||||
|
}
|
||||||
|
if mode == encodeZone {
|
||||||
|
// RFC 6874 says basically "anything goes" for zone identifiers
|
||||||
|
// and that even non-ASCII can be redundantly escaped,
|
||||||
|
// but it seems prudent to restrict %-escaped bytes here to those
|
||||||
|
// that are valid host name bytes in their unescaped form.
|
||||||
|
// That is, you can use escaping in the zone identifier but not
|
||||||
|
// to introduce bytes you couldn't just write directly.
|
||||||
|
// But Windows puts spaces here! Yay.
|
||||||
|
v := unhex(s[i+1])<<4 | unhex(s[i+2])
|
||||||
|
if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
|
||||||
|
return "", url.EscapeError(s[i : i+3])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += 3
|
||||||
|
case '+':
|
||||||
|
hasPlus = mode == encodeQueryComponent
|
||||||
|
i++
|
||||||
|
default:
|
||||||
|
if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
|
||||||
|
return "", url.InvalidHostError(s[i : i+1])
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if n == 0 && !hasPlus {
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var t strings.Builder
|
||||||
|
t.Grow(len(s) - 2*n)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
switch s[i] {
|
||||||
|
case '%':
|
||||||
|
t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
|
||||||
|
i += 2
|
||||||
|
case '+':
|
||||||
|
if mode == encodeQueryComponent {
|
||||||
|
t.WriteByte(' ')
|
||||||
|
} else {
|
||||||
|
t.WriteByte('+')
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
t.WriteByte(s[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func escape(s string, mode encoding) string {
|
||||||
|
spaceCount, hexCount := 0, 0
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if shouldEscape(c, mode) {
|
||||||
|
if c == ' ' && mode == encodeQueryComponent {
|
||||||
|
spaceCount++
|
||||||
|
} else {
|
||||||
|
hexCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if spaceCount == 0 && hexCount == 0 {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf [64]byte
|
||||||
|
var t []byte
|
||||||
|
|
||||||
|
required := len(s) + 2*hexCount
|
||||||
|
if required <= len(buf) {
|
||||||
|
t = buf[:required]
|
||||||
|
} else {
|
||||||
|
t = make([]byte, required)
|
||||||
|
}
|
||||||
|
|
||||||
|
if hexCount == 0 {
|
||||||
|
copy(t, s)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
if s[i] == ' ' {
|
||||||
|
t[i] = '+'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
j := 0
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
switch c := s[i]; {
|
||||||
|
case c == ' ' && mode == encodeQueryComponent:
|
||||||
|
t[j] = '+'
|
||||||
|
j++
|
||||||
|
case shouldEscape(c, mode):
|
||||||
|
t[j] = '%'
|
||||||
|
t[j+1] = upperhex[c>>4]
|
||||||
|
t[j+2] = upperhex[c&15]
|
||||||
|
j += 3
|
||||||
|
default:
|
||||||
|
t[j] = s[i]
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setPath(u *url.URL, p string) error {
|
||||||
|
path, err := unescape(p, encodePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
u.Path = path
|
||||||
|
if escp := escape(path, encodePath); p == escp {
|
||||||
|
// Default encoding is fine.
|
||||||
|
u.RawPath = ""
|
||||||
|
} else {
|
||||||
|
u.RawPath = p
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// JoinURLPath returns a new URL with the provided path elements joined to
|
||||||
|
// any existing path and the resulting path cleaned of any ./ or ../ elements.
|
||||||
|
// Any sequences of multiple / characters will be reduced to a single /.
|
||||||
|
func JoinURLPath(u *url.URL, elem ...string) *url.URL {
|
||||||
|
|
||||||
|
url := *u
|
||||||
|
if len(elem) > 0 {
|
||||||
|
elem = append([]string{u.EscapedPath()}, elem...)
|
||||||
|
p := path.Join(elem...)
|
||||||
|
// path.Join will remove any trailing slashes.
|
||||||
|
// Preserve at least one.
|
||||||
|
if strings.HasSuffix(elem[len(elem)-1], "/") && !strings.HasSuffix(p, "/") {
|
||||||
|
p += "/"
|
||||||
|
}
|
||||||
|
setPath(&url, p)
|
||||||
|
}
|
||||||
|
return &url
|
||||||
|
}
|
||||||
20
util/joinpath_go119.go
Normal file
20
util/joinpath_go119.go
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
// This file is Free Software under the MIT License
|
||||||
|
// without warranty, see README.md and LICENSES/MIT.txt for details.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
//
|
||||||
|
// SPDX-FileCopyrightText: 2022 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
|
||||||
|
// Software-Engineering: 2022 Intevation GmbH <https://intevation.de>
|
||||||
|
|
||||||
|
//go:build go1.19
|
||||||
|
|
||||||
|
package util
|
||||||
|
|
||||||
|
import "net/url"
|
||||||
|
|
||||||
|
// JoinURLPath returns a new URL with the provided path elements joined to
|
||||||
|
// any existing path and the resulting path cleaned of any ./ or ../ elements.
|
||||||
|
// Any sequences of multiple / characters will be reduced to a single /.
|
||||||
|
func JoinURLPath(u *url.URL, elem ...string) *URL {
|
||||||
|
return u.JoinPath(elem...)
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue