mirror of
https://github.com/gocsaf/csaf.git
synced 2025-12-22 18:15:42 +01:00
Merge pull request #413 from csaf-poc/downloader-interval
Downloader: Make time range configurable to download advisories from
This commit is contained in:
commit
8a9dd6e842
6 changed files with 199 additions and 37 deletions
|
|
@ -76,8 +76,7 @@ func (w *worker) mirrorInternal() (*csaf.AggregatorCSAFProvider, error) {
|
||||||
w.client,
|
w.client,
|
||||||
w.expr,
|
w.expr,
|
||||||
w.metadataProvider,
|
w.metadataProvider,
|
||||||
base,
|
base)
|
||||||
nil)
|
|
||||||
|
|
||||||
if err := afp.Process(w.mirrorFiles); err != nil {
|
if err := afp.Process(w.mirrorFiles); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ package main
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
|
||||||
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
|
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -27,6 +28,7 @@ type config struct {
|
||||||
Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"`
|
Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"`
|
||||||
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
|
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
|
||||||
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
|
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
|
||||||
|
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
|
||||||
|
|
||||||
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
|
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -150,8 +150,12 @@ func (d *downloader) download(ctx context.Context, domain string) error {
|
||||||
client,
|
client,
|
||||||
d.eval,
|
d.eval,
|
||||||
lpmd.Document,
|
lpmd.Document,
|
||||||
base,
|
base)
|
||||||
nil)
|
|
||||||
|
// Do we need time range based filtering?
|
||||||
|
if d.cfg.Range != nil {
|
||||||
|
afp.AgeAccept = d.cfg.Range.Contains
|
||||||
|
}
|
||||||
|
|
||||||
return afp.Process(func(label csaf.TLPLabel, files []csaf.AdvisoryFile) error {
|
return afp.Process(func(label csaf.TLPLabel, files []csaf.AdvisoryFile) error {
|
||||||
return d.downloadFiles(ctx, label, files)
|
return d.downloadFiles(ctx, label, files)
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,14 @@
|
||||||
package csaf
|
package csaf
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/csaf-poc/csaf_distribution/v2/util"
|
"github.com/csaf-poc/csaf_distribution/v2/util"
|
||||||
)
|
)
|
||||||
|
|
@ -71,11 +74,12 @@ func (haf HashedAdvisoryFile) SignURL() string { return haf.name(3, ".asc") }
|
||||||
// AdvisoryFileProcessor implements the extraction of
|
// AdvisoryFileProcessor implements the extraction of
|
||||||
// advisory file names from a given provider metadata.
|
// advisory file names from a given provider metadata.
|
||||||
type AdvisoryFileProcessor struct {
|
type AdvisoryFileProcessor struct {
|
||||||
|
AgeAccept func(time.Time) bool
|
||||||
|
Log func(format string, args ...any)
|
||||||
client util.Client
|
client util.Client
|
||||||
expr *util.PathEval
|
expr *util.PathEval
|
||||||
doc any
|
doc any
|
||||||
base *url.URL
|
base *url.URL
|
||||||
log func(format string, args ...any)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAdvisoryFileProcessor constructs an filename extractor
|
// NewAdvisoryFileProcessor constructs an filename extractor
|
||||||
|
|
@ -85,14 +89,12 @@ func NewAdvisoryFileProcessor(
|
||||||
expr *util.PathEval,
|
expr *util.PathEval,
|
||||||
doc any,
|
doc any,
|
||||||
base *url.URL,
|
base *url.URL,
|
||||||
log func(format string, args ...any),
|
|
||||||
) *AdvisoryFileProcessor {
|
) *AdvisoryFileProcessor {
|
||||||
return &AdvisoryFileProcessor{
|
return &AdvisoryFileProcessor{
|
||||||
client: client,
|
client: client,
|
||||||
expr: expr,
|
expr: expr,
|
||||||
doc: doc,
|
doc: doc,
|
||||||
base: base,
|
base: base,
|
||||||
log: log,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -111,7 +113,7 @@ func empty(arr []string) bool {
|
||||||
func (afp *AdvisoryFileProcessor) Process(
|
func (afp *AdvisoryFileProcessor) Process(
|
||||||
fn func(TLPLabel, []AdvisoryFile) error,
|
fn func(TLPLabel, []AdvisoryFile) error,
|
||||||
) error {
|
) error {
|
||||||
lg := afp.log
|
lg := afp.Log
|
||||||
if lg == nil {
|
if lg == nil {
|
||||||
lg = func(format string, args ...any) {
|
lg = func(format string, args ...any) {
|
||||||
log.Printf("AdvisoryFileProcessor.Process: "+format, args...)
|
log.Printf("AdvisoryFileProcessor.Process: "+format, args...)
|
||||||
|
|
@ -173,7 +175,8 @@ func (afp *AdvisoryFileProcessor) Process(
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
files, err := afp.loadIndex(base, lg)
|
// Use changes.csv to be able to filter by age.
|
||||||
|
files, err := afp.loadChanges(base, lg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -186,9 +189,9 @@ func (afp *AdvisoryFileProcessor) Process(
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadIndex loads baseURL/index.txt and returns a list of files
|
// loadChanges loads baseURL/changes.csv and returns a list of files
|
||||||
// prefixed by baseURL/.
|
// prefixed by baseURL/.
|
||||||
func (afp *AdvisoryFileProcessor) loadIndex(
|
func (afp *AdvisoryFileProcessor) loadChanges(
|
||||||
baseURL string,
|
baseURL string,
|
||||||
lg func(string, ...any),
|
lg func(string, ...any),
|
||||||
) ([]AdvisoryFile, error) {
|
) ([]AdvisoryFile, error) {
|
||||||
|
|
@ -197,29 +200,53 @@ func (afp *AdvisoryFileProcessor) loadIndex(
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
changesURL := base.JoinPath("changes.csv").String()
|
||||||
|
|
||||||
indexURL := base.JoinPath("index.txt").String()
|
resp, err := afp.client.Get(changesURL)
|
||||||
resp, err := afp.client.Get(indexURL)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("fetching %s failed. Status code %d (%s)",
|
||||||
|
changesURL, resp.StatusCode, resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
var files []AdvisoryFile
|
var files []AdvisoryFile
|
||||||
|
c := csv.NewReader(resp.Body)
|
||||||
scanner := bufio.NewScanner(resp.Body)
|
const (
|
||||||
|
pathColumn = 0
|
||||||
for line := 1; scanner.Scan(); line++ {
|
timeColumn = 1
|
||||||
u := scanner.Text()
|
)
|
||||||
if _, err := url.Parse(u); err != nil {
|
for line := 1; ; line++ {
|
||||||
lg("index.txt contains invalid URL %q in line %d", u, line)
|
r, err := c.Read()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(r) < 2 {
|
||||||
|
lg("%q has not enough columns in line %d", line)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t, err := time.Parse(time.RFC3339, r[timeColumn])
|
||||||
|
if err != nil {
|
||||||
|
lg("%q has an invalid time stamp in line %d: %v", changesURL, line, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Apply date range filtering.
|
||||||
|
if afp.AgeAccept != nil && !afp.AgeAccept(t) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
path := r[pathColumn]
|
||||||
|
if _, err := url.Parse(path); err != nil {
|
||||||
|
lg("%q contains an invalid URL %q in line %d", changesURL, path, line)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
files = append(files,
|
files = append(files,
|
||||||
PlainAdvisoryFile(base.JoinPath(u).String()))
|
PlainAdvisoryFile(base.JoinPath(path).String()))
|
||||||
}
|
|
||||||
|
|
||||||
if err := scanner.Err(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
return files, nil
|
return files, nil
|
||||||
}
|
}
|
||||||
|
|
@ -287,6 +314,13 @@ func (afp *AdvisoryFileProcessor) processROLIE(
|
||||||
|
|
||||||
rfeed.Entries(func(entry *Entry) {
|
rfeed.Entries(func(entry *Entry) {
|
||||||
|
|
||||||
|
// Filter if we have date checking.
|
||||||
|
if afp.AgeAccept != nil {
|
||||||
|
if pub := time.Time(entry.Published); !pub.IsZero() && !afp.AgeAccept(pub) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var self, sha256, sha512, sign string
|
var self, sha256, sha512, sign string
|
||||||
|
|
||||||
for i := range entry.Link {
|
for i := range entry.Link {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ Application Options:
|
||||||
-v, --verbose Verbose output
|
-v, --verbose Verbose output
|
||||||
-r, --rate= The average upper limit of https operations per second (defaults to unlimited)
|
-r, --rate= The average upper limit of https operations per second (defaults to unlimited)
|
||||||
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
|
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
|
||||||
|
-t, --timerange=RANGE RANGE of time from which advisories to download
|
||||||
-H, --header= One or more extra HTTP header fields
|
-H, --header= One or more extra HTTP header fields
|
||||||
--validator=URL URL to validate documents remotely
|
--validator=URL URL to validate documents remotely
|
||||||
--validatorcache=FILE FILE to cache remote validations
|
--validatorcache=FILE FILE to cache remote validations
|
||||||
|
|
@ -54,4 +55,34 @@ worker = 2
|
||||||
# validator # not set by default
|
# validator # not set by default
|
||||||
# validatorcache # not set by default
|
# validatorcache # not set by default
|
||||||
validatorpreset = ["mandatory"]
|
validatorpreset = ["mandatory"]
|
||||||
|
# timerange # not set by default
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The `timerange` parameter enables downloading advisories which last changes falls
|
||||||
|
into a given intervall. There are three possible notations:
|
||||||
|
|
||||||
|
1. Relative. If the given string follows the rules of being a [Go duration](https://pkg.go.dev/time@go1.20.6#ParseDuration)
|
||||||
|
the time interval from now minus that duration till now is used.
|
||||||
|
E.g. `"3h"` means downloading the advisories that have changed in the last three hours.
|
||||||
|
|
||||||
|
2. Absolute. If the given string is an RFC 3339 date timestamp the time interval between
|
||||||
|
this date and now is used.
|
||||||
|
E.g. `"2006-01-02"` means that all files between 2006 January 2nd and now going to being
|
||||||
|
downloaded.
|
||||||
|
Accepted patterns are:
|
||||||
|
- `"2006-01-02T15:04:05Z"`
|
||||||
|
- `"2006-01-02T15:04:05+07:00"`
|
||||||
|
- `"2006-01-02T15:04:05-07:00"`
|
||||||
|
- `"2006-01-02T15:04:05"`
|
||||||
|
- `"2006-01-02T15:04"`
|
||||||
|
- `"2006-01-02T15"`
|
||||||
|
- `"2006-01-02"`
|
||||||
|
- `"2006-01"`
|
||||||
|
- `"2006"`
|
||||||
|
|
||||||
|
Missing parts are set to the smallest value possible in that field.
|
||||||
|
|
||||||
|
3. Range. Same as 2 but separated by a `,` to span an interval. e.g `2019,2024`
|
||||||
|
spans an interval from 1st January 2019 to the 1st January of 2024.
|
||||||
|
|
||||||
|
All interval boundaries are inclusive.
|
||||||
|
|
|
||||||
92
internal/models/models.go
Normal file
92
internal/models/models.go
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
// This file is Free Software under the MIT License
|
||||||
|
// without warranty, see README.md and LICENSES/MIT.txt for details.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
//
|
||||||
|
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
|
||||||
|
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>
|
||||||
|
|
||||||
|
// Package models contains helper models used in the tools internally.
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TimeRange is a time interval.
|
||||||
|
type TimeRange [2]time.Time
|
||||||
|
|
||||||
|
// NewTimeInterval creates a new time range.
|
||||||
|
// The time values will be sorted.
|
||||||
|
func NewTimeInterval(a, b time.Time) TimeRange {
|
||||||
|
if b.Before(a) {
|
||||||
|
a, b = b, a
|
||||||
|
}
|
||||||
|
return TimeRange{a, b}
|
||||||
|
}
|
||||||
|
|
||||||
|
// guessDate tries to guess an RFC 3339 date time from a given string.
|
||||||
|
func guessDate(s string) (time.Time, bool) {
|
||||||
|
for _, layout := range []string{
|
||||||
|
"2006-01-02T15:04:05Z07:00",
|
||||||
|
"2006-01-02T15:04:05",
|
||||||
|
"2006-01-02T15:04",
|
||||||
|
"2006-01-02T15",
|
||||||
|
"2006-01-02",
|
||||||
|
"2006-01",
|
||||||
|
"2006",
|
||||||
|
} {
|
||||||
|
if t, err := time.Parse(layout, s); err == nil {
|
||||||
|
return t, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return time.Time{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalText implements [encoding/text.TextUnmarshaler].
|
||||||
|
func (tr *TimeRange) UnmarshalText(text []byte) error {
|
||||||
|
return tr.UnmarshalFlag(string(text))
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalFlag implements [go-flags/Unmarshaler].
|
||||||
|
func (tr *TimeRange) UnmarshalFlag(s string) error {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
|
||||||
|
// Handle relative case first.
|
||||||
|
if duration, err := time.ParseDuration(s); err == nil {
|
||||||
|
now := time.Now()
|
||||||
|
*tr = NewTimeInterval(now.Add(-duration), now)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
a, b, found := strings.Cut(s, ",")
|
||||||
|
a, b = strings.TrimSpace(a), strings.TrimSpace(b)
|
||||||
|
|
||||||
|
// Only start date?
|
||||||
|
if !found {
|
||||||
|
start, ok := guessDate(a)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("%q is not a valid RFC date time", a)
|
||||||
|
}
|
||||||
|
*tr = NewTimeInterval(start, time.Now())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Real interval
|
||||||
|
start, ok := guessDate(a)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("%q is not a valid RFC date time", a)
|
||||||
|
}
|
||||||
|
end, ok := guessDate(b)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("%q is not a valid RFC date time", b)
|
||||||
|
}
|
||||||
|
*tr = NewTimeInterval(start, end)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contains return true if the given time is inside this time interval.
|
||||||
|
func (tr TimeRange) Contains(t time.Time) bool {
|
||||||
|
return !(t.Before(tr[0]) || t.After(tr[1]))
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue