From 4cd0fc3bb4dce90b7dbd623d65e8ec02314ab0c1 Mon Sep 17 00:00:00 2001 From: "Sascha L. Teichmann" Date: Wed, 16 Aug 2023 19:38:55 +0200 Subject: [PATCH] Aggregator: ignore advisories by given patterns (#421) * Ignore advisories in checker. * Rename config.check to config.prepare to make symmerical to other tools. * Add ignore patterns to aggreagtor. * Clarified docs on where and how to use ignorepattern for aggregator --------- Co-authored-by: JanHoefelmeyer --- cmd/csaf_aggregator/config.go | 49 ++++++++++++++++++++++++++++++++++- cmd/csaf_aggregator/main.go | 2 +- cmd/csaf_aggregator/mirror.go | 8 ++++++ docs/csaf_aggregator.md | 3 +++ docs/examples/aggregator.toml | 1 + 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/cmd/csaf_aggregator/config.go b/cmd/csaf_aggregator/config.go index da91c2f..b4205a4 100644 --- a/cmd/csaf_aggregator/config.go +++ b/cmd/csaf_aggregator/config.go @@ -20,6 +20,7 @@ import ( "github.com/ProtonMail/gopenpgp/v2/crypto" "github.com/csaf-poc/csaf_distribution/v2/csaf" + "github.com/csaf-poc/csaf_distribution/v2/internal/filter" "github.com/csaf-poc/csaf_distribution/v2/internal/options" "github.com/csaf-poc/csaf_distribution/v2/util" "golang.org/x/time/rate" @@ -48,6 +49,10 @@ type provider struct { // UpdateInterval is as the mandatory `update_interval` if this is a publisher. UpdateInterval *string `toml:"update_interval"` + + // IgnorePattern is a list of patterns of advisory URLs to be ignored. + IgnorePattern []string `toml:"ignorepattern"` + ignorePattern filter.PatternMatcher } type config struct { @@ -90,6 +95,10 @@ type config struct { // 'update_interval'. UpdateInterval *string `toml:"update_interval"` + // IgnorePattern is a list of patterns of advisory URLs to be ignored. + IgnorePattern []string `toml:"ignorepattern"` + ignorePattern filter.PatternMatcher + Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"` keyMu sync.Mutex @@ -128,6 +137,11 @@ func (c *config) tooOldForInterims() func(time.Time) bool { return func(t time.Time) bool { return t.Before(from) } } +// ignoreFile returns true if the given URL should not be downloaded. +func (p *provider) ignoreURL(u string, c *config) bool { + return p.ignorePattern.Matches(u) || c.ignorePattern.Matches(u) +} + // updateInterval returns the update interval of a publisher. func (p *provider) updateInterval(c *config) string { if p.UpdateInterval != nil { @@ -307,11 +321,44 @@ func (c *config) setDefaults() { } } -func (c *config) check() error { +// compileIgnorePatterns compiles the configured patterns to be ignored. +func (p *provider) compileIgnorePatterns() error { + pm, err := filter.NewPatternMatcher(p.IgnorePattern) + if err != nil { + return err + } + p.ignorePattern = pm + return nil +} + +// compileIgnorePatterns compiles the configured patterns to be ignored. +func (c *config) compileIgnorePatterns() error { + // Compile the top level patterns. + pm, err := filter.NewPatternMatcher(c.IgnorePattern) + if err != nil { + return err + } + c.ignorePattern = pm + // Compile the patterns of the providers. + for _, p := range c.Providers { + if err := p.compileIgnorePatterns(); err != nil { + return fmt.Errorf("invalid ignore patterns for %q: %w", p.Name, err) + } + } + return nil +} + +// prepare prepares internal state of a loaded configuration. +func (c *config) prepare() error { + if len(c.Providers) == 0 { return errors.New("no providers given in configuration") } + if err := c.compileIgnorePatterns(); err != nil { + return err + } + if err := c.Aggregator.Validate(); err != nil { return err } diff --git a/cmd/csaf_aggregator/main.go b/cmd/csaf_aggregator/main.go index a592364..aca2a8a 100644 --- a/cmd/csaf_aggregator/main.go +++ b/cmd/csaf_aggregator/main.go @@ -45,7 +45,7 @@ func lock(lockFile *string, fn func() error) error { func main() { _, cfg, err := parseArgsConfig() options.ErrorCheck(err) - options.ErrorCheck(cfg.check()) + options.ErrorCheck(cfg.prepare()) p := processor{cfg: cfg} options.ErrorCheck(lock(cfg.LockFile, p.process)) } diff --git a/cmd/csaf_aggregator/mirror.go b/cmd/csaf_aggregator/mirror.go index 0fd1de0..64ef18a 100644 --- a/cmd/csaf_aggregator/mirror.go +++ b/cmd/csaf_aggregator/mirror.go @@ -500,6 +500,14 @@ func (w *worker) mirrorFiles(tlpLabel csaf.TLPLabel, files []csaf.AdvisoryFile) continue } + // Should we ignore this advisory? + if w.provider.ignoreURL(file.URL(), w.processor.cfg) { + if w.processor.cfg.Verbose { + log.Printf("Ignoring %s: %q\n", w.provider.Name, file.URL()) + } + continue + } + // Ignore not conforming filenames. filename := filepath.Base(u.Path) if !util.ConformingFileName(filename) { diff --git a/docs/csaf_aggregator.md b/docs/csaf_aggregator.md index edbe8e8..2a9e482 100644 --- a/docs/csaf_aggregator.md +++ b/docs/csaf_aggregator.md @@ -94,6 +94,7 @@ lock_file // path to lockfile, to stop other instances if one is n interim_years // limiting the years for which interim documents are searched (default 0) verbose // print more diagnostic output, e.g. https requests (default false) allow_single_provider // debugging option (default false) +ignorepattern // patterns of advisory URLs to be ignored ``` Next we have two TOML _tables_: @@ -123,6 +124,7 @@ category update_interval create_service_document categories +ignorepattern ``` Where valid `name` and `domain` settings are required. @@ -204,6 +206,7 @@ insecure = true # If aggregator.category == "aggregator", set for an entry that should # be listed in addition: category = "lister" +# ignorepattern = [".*white.*", ".*red.*"] ``` diff --git a/docs/examples/aggregator.toml b/docs/examples/aggregator.toml index 597144c..80f9b8a 100644 --- a/docs/examples/aggregator.toml +++ b/docs/examples/aggregator.toml @@ -48,3 +48,4 @@ insecure = true # If aggregator.category == "aggreator", set for an entry that should # be listed in addition: category = "lister" +# ignorepattern = [".*white.*", ".*red.*"]