mirror of
https://github.com/gocsaf/csaf.git
synced 2025-12-22 11:55:40 +01:00
Merge pull request #419 from csaf-poc/download-ignore-pattern
Downloader: ignore advisories by given patterns
This commit is contained in:
commit
2d1dc180c8
4 changed files with 87 additions and 10 deletions
|
|
@ -11,6 +11,7 @@ package main
|
|||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/csaf-poc/csaf_distribution/v2/internal/filter"
|
||||
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
|
||||
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
|
||||
)
|
||||
|
|
@ -30,14 +31,16 @@ type config struct {
|
|||
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
|
||||
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
|
||||
Folder string `long:"folder" short:"f" description:"Download into a given subFOLDER" value-name:"FOLDER" toml:"folder"`
|
||||
|
||||
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
|
||||
IgnorePattern []string `long:"ignorepattern" short:"i" description:"Do not download files if their URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"`
|
||||
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
|
||||
|
||||
RemoteValidator string `long:"validator" description:"URL to validate documents remotely" value-name:"URL" toml:"validator"`
|
||||
RemoteValidatorCache string `long:"validatorcache" description:"FILE to cache remote validations" value-name:"FILE" toml:"validatorcache"`
|
||||
RemoteValidatorPresets []string `long:"validatorpreset" description:"One or more PRESETS to validate remotely" value-name:"PRESETS" toml:"validatorpreset"`
|
||||
|
||||
Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"`
|
||||
|
||||
ignorePattern filter.PatternMatcher
|
||||
}
|
||||
|
||||
// configPaths are the potential file locations of the config file.
|
||||
|
|
@ -51,11 +54,9 @@ var configPaths = []string{
|
|||
func parseArgsConfig() ([]string, *config, error) {
|
||||
p := options.Parser[config]{
|
||||
DefaultConfigLocations: configPaths,
|
||||
ConfigLocation: func(cfg *config) string {
|
||||
return cfg.Config
|
||||
},
|
||||
Usage: "[OPTIONS] domain...",
|
||||
HasVersion: func(cfg *config) bool { return cfg.Version },
|
||||
ConfigLocation: func(cfg *config) string { return cfg.Config },
|
||||
Usage: "[OPTIONS] domain...",
|
||||
HasVersion: func(cfg *config) bool { return cfg.Version },
|
||||
SetDefaults: func(cfg *config) {
|
||||
cfg.Worker = defaultWorker
|
||||
cfg.RemoteValidatorPresets = []string{defaultPreset}
|
||||
|
|
@ -73,8 +74,22 @@ func parseArgsConfig() ([]string, *config, error) {
|
|||
return p.Parse()
|
||||
}
|
||||
|
||||
// prepare prepares internal state of a loaded configuration.
|
||||
func (cfg *config) prepare() error {
|
||||
// TODO: Implement me!
|
||||
// ignoreFile returns true if the given URL should not be downloaded.
|
||||
func (cfg *config) ignoreURL(u string) bool {
|
||||
return cfg.ignorePattern.Matches(u)
|
||||
}
|
||||
|
||||
// compileIgnorePatterns compiles the configure patterns to be ignored.
|
||||
func (cfg *config) compileIgnorePatterns() error {
|
||||
pm, err := filter.NewPatternMatcher(cfg.IgnorePattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cfg.ignorePattern = pm
|
||||
return nil
|
||||
}
|
||||
|
||||
// prepare prepares internal state of a loaded configuration.
|
||||
func (cfg *config) prepare() error {
|
||||
return cfg.compileIgnorePatterns()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -347,6 +347,13 @@ nextAdvisory:
|
|||
continue
|
||||
}
|
||||
|
||||
if d.cfg.ignoreURL(file.URL()) {
|
||||
if d.cfg.Verbose {
|
||||
log.Printf("Ignoring %q.\n", file.URL())
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
resp, err := client.Get(file.URL())
|
||||
if err != nil {
|
||||
log.Printf("WARN: cannot get '%s': %v\n", file.URL(), err)
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ Application Options:
|
|||
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
|
||||
-t, --timerange=RANGE RANGE of time from which advisories to download
|
||||
-f, --folder=FOLDER Download all into a given subFOLDER
|
||||
-i, --ignorepattern=PATTERN Do not download files if their URLs match any of the given PATTERNs
|
||||
-H, --header= One or more extra HTTP header fields
|
||||
--validator=URL URL to validate documents remotely
|
||||
--validatorcache=FILE FILE to cache remote validations
|
||||
|
|
@ -54,6 +55,7 @@ verbose = false
|
|||
worker = 2
|
||||
# timerange # not set by default
|
||||
# folder # not set by default
|
||||
# ignorepattern # not set by default
|
||||
# header # not set by default
|
||||
# validator # not set by default
|
||||
# validatorcache # not set by default
|
||||
|
|
@ -92,3 +94,14 @@ All interval boundaries are inclusive.
|
|||
If the `folder` option is given all the advisories are stored in a subfolder
|
||||
of this name. Otherwise the advisories are each stored in a folder named
|
||||
by the year they are from.
|
||||
|
||||
You can ignore certain advisories while downloading by specifying a list
|
||||
of regular expressions to match their URLs by using the `ignorepattern`
|
||||
option.
|
||||
|
||||
E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain
|
||||
the sub strings **white** or **red**.
|
||||
In the config file this has to be noted as:
|
||||
```
|
||||
ignorepattern = [".*white.*", ".*red.*"]
|
||||
```
|
||||
|
|
|
|||
42
internal/filter/filter.go
Normal file
42
internal/filter/filter.go
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
// This file is Free Software under the MIT License
|
||||
// without warranty, see README.md and LICENSES/MIT.txt for details.
|
||||
//
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
|
||||
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>
|
||||
|
||||
// Package filter implements helps to filter advisories.
|
||||
package filter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// PatternMatcher is a list of regular expressions.
|
||||
type PatternMatcher []*regexp.Regexp
|
||||
|
||||
// NewPatternMatcher compiles a new list of regular expression from
|
||||
// a given list of strings.
|
||||
func NewPatternMatcher(patterns []string) (PatternMatcher, error) {
|
||||
pm := make(PatternMatcher, 0, len(patterns))
|
||||
for _, pattern := range patterns {
|
||||
expr, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid ignore pattern: %w", err)
|
||||
}
|
||||
pm = append(pm, expr)
|
||||
}
|
||||
return pm, nil
|
||||
}
|
||||
|
||||
// Matches returns true if the given string matches any of the expressions.
|
||||
func (pm PatternMatcher) Matches(s string) bool {
|
||||
for _, expr := range pm {
|
||||
if expr.MatchString(s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue