diff --git a/cmd/csaf_downloader/config.go b/cmd/csaf_downloader/config.go index 6c16b37..56afc47 100644 --- a/cmd/csaf_downloader/config.go +++ b/cmd/csaf_downloader/config.go @@ -11,6 +11,7 @@ package main import ( "net/http" + "github.com/csaf-poc/csaf_distribution/v2/internal/filter" "github.com/csaf-poc/csaf_distribution/v2/internal/models" "github.com/csaf-poc/csaf_distribution/v2/internal/options" ) @@ -30,14 +31,16 @@ type config struct { Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"` Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"` Folder string `long:"folder" short:"f" description:"Download into a given subFOLDER" value-name:"FOLDER" toml:"folder"` - - ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"` + IgnorePattern []string `long:"ignorepattern" short:"i" description:"Do not download files if their URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"` + ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"` RemoteValidator string `long:"validator" description:"URL to validate documents remotely" value-name:"URL" toml:"validator"` RemoteValidatorCache string `long:"validatorcache" description:"FILE to cache remote validations" value-name:"FILE" toml:"validatorcache"` RemoteValidatorPresets []string `long:"validatorpreset" description:"One or more PRESETS to validate remotely" value-name:"PRESETS" toml:"validatorpreset"` Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"` + + ignorePattern filter.PatternMatcher } // configPaths are the potential file locations of the config file. @@ -51,11 +54,9 @@ var configPaths = []string{ func parseArgsConfig() ([]string, *config, error) { p := options.Parser[config]{ DefaultConfigLocations: configPaths, - ConfigLocation: func(cfg *config) string { - return cfg.Config - }, - Usage: "[OPTIONS] domain...", - HasVersion: func(cfg *config) bool { return cfg.Version }, + ConfigLocation: func(cfg *config) string { return cfg.Config }, + Usage: "[OPTIONS] domain...", + HasVersion: func(cfg *config) bool { return cfg.Version }, SetDefaults: func(cfg *config) { cfg.Worker = defaultWorker cfg.RemoteValidatorPresets = []string{defaultPreset} @@ -73,8 +74,22 @@ func parseArgsConfig() ([]string, *config, error) { return p.Parse() } -// prepare prepares internal state of a loaded configuration. -func (cfg *config) prepare() error { - // TODO: Implement me! +// ignoreFile returns true if the given URL should not be downloaded. +func (cfg *config) ignoreURL(u string) bool { + return cfg.ignorePattern.Matches(u) +} + +// compileIgnorePatterns compiles the configure patterns to be ignored. +func (cfg *config) compileIgnorePatterns() error { + pm, err := filter.NewPatternMatcher(cfg.IgnorePattern) + if err != nil { + return err + } + cfg.ignorePattern = pm return nil } + +// prepare prepares internal state of a loaded configuration. +func (cfg *config) prepare() error { + return cfg.compileIgnorePatterns() +} diff --git a/cmd/csaf_downloader/downloader.go b/cmd/csaf_downloader/downloader.go index 8c5312b..8d6fd80 100644 --- a/cmd/csaf_downloader/downloader.go +++ b/cmd/csaf_downloader/downloader.go @@ -347,6 +347,13 @@ nextAdvisory: continue } + if d.cfg.ignoreURL(file.URL()) { + if d.cfg.Verbose { + log.Printf("Ignoring %q.\n", file.URL()) + } + continue + } + resp, err := client.Get(file.URL()) if err != nil { log.Printf("WARN: cannot get '%s': %v\n", file.URL(), err) diff --git a/docs/csaf_downloader.md b/docs/csaf_downloader.md index 24841ae..2187eff 100644 --- a/docs/csaf_downloader.md +++ b/docs/csaf_downloader.md @@ -16,6 +16,7 @@ Application Options: -w, --worker=NUM NUMber of concurrent downloads (default: 2) -t, --timerange=RANGE RANGE of time from which advisories to download -f, --folder=FOLDER Download all into a given subFOLDER + -i, --ignorepattern=PATTERN Do not download files if their URLs match any of the given PATTERNs -H, --header= One or more extra HTTP header fields --validator=URL URL to validate documents remotely --validatorcache=FILE FILE to cache remote validations @@ -54,6 +55,7 @@ verbose = false worker = 2 # timerange # not set by default # folder # not set by default +# ignorepattern # not set by default # header # not set by default # validator # not set by default # validatorcache # not set by default @@ -92,3 +94,14 @@ All interval boundaries are inclusive. If the `folder` option is given all the advisories are stored in a subfolder of this name. Otherwise the advisories are each stored in a folder named by the year they are from. + +You can ignore certain advisories while downloading by specifying a list +of regular expressions to match their URLs by using the `ignorepattern` +option. + +E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain +the sub strings **white** or **red**. +In the config file this has to be noted as: +``` +ignorepattern = [".*white.*", ".*red.*"] +``` diff --git a/internal/filter/filter.go b/internal/filter/filter.go new file mode 100644 index 0000000..bdc6afb --- /dev/null +++ b/internal/filter/filter.go @@ -0,0 +1,42 @@ +// This file is Free Software under the MIT License +// without warranty, see README.md and LICENSES/MIT.txt for details. +// +// SPDX-License-Identifier: MIT +// +// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) +// Software-Engineering: 2023 Intevation GmbH + +// Package filter implements helps to filter advisories. +package filter + +import ( + "fmt" + "regexp" +) + +// PatternMatcher is a list of regular expressions. +type PatternMatcher []*regexp.Regexp + +// NewPatternMatcher compiles a new list of regular expression from +// a given list of strings. +func NewPatternMatcher(patterns []string) (PatternMatcher, error) { + pm := make(PatternMatcher, 0, len(patterns)) + for _, pattern := range patterns { + expr, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid ignore pattern: %w", err) + } + pm = append(pm, expr) + } + return pm, nil +} + +// Matches returns true if the given string matches any of the expressions. +func (pm PatternMatcher) Matches(s string) bool { + for _, expr := range pm { + if expr.MatchString(s) { + return true + } + } + return false +}