1
0
Fork 0
mirror of https://github.com/gocsaf/csaf.git synced 2025-12-22 11:55:40 +01:00

Merge pull request #419 from csaf-poc/download-ignore-pattern

Downloader: ignore advisories by given patterns
This commit is contained in:
JanHoefelmeyer 2023-08-16 09:20:25 +02:00 committed by GitHub
commit 2d1dc180c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 10 deletions

View file

@ -11,6 +11,7 @@ package main
import (
"net/http"
"github.com/csaf-poc/csaf_distribution/v2/internal/filter"
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
)
@ -30,14 +31,16 @@ type config struct {
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
Folder string `long:"folder" short:"f" description:"Download into a given subFOLDER" value-name:"FOLDER" toml:"folder"`
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
IgnorePattern []string `long:"ignorepattern" short:"i" description:"Do not download files if their URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"`
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
RemoteValidator string `long:"validator" description:"URL to validate documents remotely" value-name:"URL" toml:"validator"`
RemoteValidatorCache string `long:"validatorcache" description:"FILE to cache remote validations" value-name:"FILE" toml:"validatorcache"`
RemoteValidatorPresets []string `long:"validatorpreset" description:"One or more PRESETS to validate remotely" value-name:"PRESETS" toml:"validatorpreset"`
Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"`
ignorePattern filter.PatternMatcher
}
// configPaths are the potential file locations of the config file.
@ -51,11 +54,9 @@ var configPaths = []string{
func parseArgsConfig() ([]string, *config, error) {
p := options.Parser[config]{
DefaultConfigLocations: configPaths,
ConfigLocation: func(cfg *config) string {
return cfg.Config
},
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
ConfigLocation: func(cfg *config) string { return cfg.Config },
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
SetDefaults: func(cfg *config) {
cfg.Worker = defaultWorker
cfg.RemoteValidatorPresets = []string{defaultPreset}
@ -73,8 +74,22 @@ func parseArgsConfig() ([]string, *config, error) {
return p.Parse()
}
// prepare prepares internal state of a loaded configuration.
func (cfg *config) prepare() error {
// TODO: Implement me!
// ignoreFile returns true if the given URL should not be downloaded.
func (cfg *config) ignoreURL(u string) bool {
return cfg.ignorePattern.Matches(u)
}
// compileIgnorePatterns compiles the configure patterns to be ignored.
func (cfg *config) compileIgnorePatterns() error {
pm, err := filter.NewPatternMatcher(cfg.IgnorePattern)
if err != nil {
return err
}
cfg.ignorePattern = pm
return nil
}
// prepare prepares internal state of a loaded configuration.
func (cfg *config) prepare() error {
return cfg.compileIgnorePatterns()
}

View file

@ -347,6 +347,13 @@ nextAdvisory:
continue
}
if d.cfg.ignoreURL(file.URL()) {
if d.cfg.Verbose {
log.Printf("Ignoring %q.\n", file.URL())
}
continue
}
resp, err := client.Get(file.URL())
if err != nil {
log.Printf("WARN: cannot get '%s': %v\n", file.URL(), err)

View file

@ -16,6 +16,7 @@ Application Options:
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
-t, --timerange=RANGE RANGE of time from which advisories to download
-f, --folder=FOLDER Download all into a given subFOLDER
-i, --ignorepattern=PATTERN Do not download files if their URLs match any of the given PATTERNs
-H, --header= One or more extra HTTP header fields
--validator=URL URL to validate documents remotely
--validatorcache=FILE FILE to cache remote validations
@ -54,6 +55,7 @@ verbose = false
worker = 2
# timerange # not set by default
# folder # not set by default
# ignorepattern # not set by default
# header # not set by default
# validator # not set by default
# validatorcache # not set by default
@ -92,3 +94,14 @@ All interval boundaries are inclusive.
If the `folder` option is given all the advisories are stored in a subfolder
of this name. Otherwise the advisories are each stored in a folder named
by the year they are from.
You can ignore certain advisories while downloading by specifying a list
of regular expressions to match their URLs by using the `ignorepattern`
option.
E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain
the sub strings **white** or **red**.
In the config file this has to be noted as:
```
ignorepattern = [".*white.*", ".*red.*"]
```

42
internal/filter/filter.go Normal file
View file

@ -0,0 +1,42 @@
// This file is Free Software under the MIT License
// without warranty, see README.md and LICENSES/MIT.txt for details.
//
// SPDX-License-Identifier: MIT
//
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>
// Package filter implements helps to filter advisories.
package filter
import (
"fmt"
"regexp"
)
// PatternMatcher is a list of regular expressions.
type PatternMatcher []*regexp.Regexp
// NewPatternMatcher compiles a new list of regular expression from
// a given list of strings.
func NewPatternMatcher(patterns []string) (PatternMatcher, error) {
pm := make(PatternMatcher, 0, len(patterns))
for _, pattern := range patterns {
expr, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid ignore pattern: %w", err)
}
pm = append(pm, expr)
}
return pm, nil
}
// Matches returns true if the given string matches any of the expressions.
func (pm PatternMatcher) Matches(s string) bool {
for _, expr := range pm {
if expr.MatchString(s) {
return true
}
}
return false
}