mirror of
https://github.com/gocsaf/csaf.git
synced 2025-12-22 18:15:42 +01:00
Merge pull request #413 from csaf-poc/downloader-interval
Downloader: Make time range configurable to download advisories from
This commit is contained in:
commit
8a9dd6e842
6 changed files with 199 additions and 37 deletions
|
|
@ -76,8 +76,7 @@ func (w *worker) mirrorInternal() (*csaf.AggregatorCSAFProvider, error) {
|
|||
w.client,
|
||||
w.expr,
|
||||
w.metadataProvider,
|
||||
base,
|
||||
nil)
|
||||
base)
|
||||
|
||||
if err := afp.Process(w.mirrorFiles); err != nil {
|
||||
return nil, err
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ package main
|
|||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
|
||||
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
|
||||
)
|
||||
|
||||
|
|
@ -27,6 +28,7 @@ type config struct {
|
|||
Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"`
|
||||
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
|
||||
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
|
||||
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
|
||||
|
||||
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
|
||||
|
||||
|
|
|
|||
|
|
@ -150,8 +150,12 @@ func (d *downloader) download(ctx context.Context, domain string) error {
|
|||
client,
|
||||
d.eval,
|
||||
lpmd.Document,
|
||||
base,
|
||||
nil)
|
||||
base)
|
||||
|
||||
// Do we need time range based filtering?
|
||||
if d.cfg.Range != nil {
|
||||
afp.AgeAccept = d.cfg.Range.Contains
|
||||
}
|
||||
|
||||
return afp.Process(func(label csaf.TLPLabel, files []csaf.AdvisoryFile) error {
|
||||
return d.downloadFiles(ctx, label, files)
|
||||
|
|
|
|||
|
|
@ -9,11 +9,14 @@
|
|||
package csaf
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/csaf-poc/csaf_distribution/v2/util"
|
||||
)
|
||||
|
|
@ -71,11 +74,12 @@ func (haf HashedAdvisoryFile) SignURL() string { return haf.name(3, ".asc") }
|
|||
// AdvisoryFileProcessor implements the extraction of
|
||||
// advisory file names from a given provider metadata.
|
||||
type AdvisoryFileProcessor struct {
|
||||
AgeAccept func(time.Time) bool
|
||||
Log func(format string, args ...any)
|
||||
client util.Client
|
||||
expr *util.PathEval
|
||||
doc any
|
||||
base *url.URL
|
||||
log func(format string, args ...any)
|
||||
}
|
||||
|
||||
// NewAdvisoryFileProcessor constructs an filename extractor
|
||||
|
|
@ -85,14 +89,12 @@ func NewAdvisoryFileProcessor(
|
|||
expr *util.PathEval,
|
||||
doc any,
|
||||
base *url.URL,
|
||||
log func(format string, args ...any),
|
||||
) *AdvisoryFileProcessor {
|
||||
return &AdvisoryFileProcessor{
|
||||
client: client,
|
||||
expr: expr,
|
||||
doc: doc,
|
||||
base: base,
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -111,7 +113,7 @@ func empty(arr []string) bool {
|
|||
func (afp *AdvisoryFileProcessor) Process(
|
||||
fn func(TLPLabel, []AdvisoryFile) error,
|
||||
) error {
|
||||
lg := afp.log
|
||||
lg := afp.Log
|
||||
if lg == nil {
|
||||
lg = func(format string, args ...any) {
|
||||
log.Printf("AdvisoryFileProcessor.Process: "+format, args...)
|
||||
|
|
@ -173,7 +175,8 @@ func (afp *AdvisoryFileProcessor) Process(
|
|||
continue
|
||||
}
|
||||
|
||||
files, err := afp.loadIndex(base, lg)
|
||||
// Use changes.csv to be able to filter by age.
|
||||
files, err := afp.loadChanges(base, lg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -186,9 +189,9 @@ func (afp *AdvisoryFileProcessor) Process(
|
|||
return nil
|
||||
}
|
||||
|
||||
// loadIndex loads baseURL/index.txt and returns a list of files
|
||||
// loadChanges loads baseURL/changes.csv and returns a list of files
|
||||
// prefixed by baseURL/.
|
||||
func (afp *AdvisoryFileProcessor) loadIndex(
|
||||
func (afp *AdvisoryFileProcessor) loadChanges(
|
||||
baseURL string,
|
||||
lg func(string, ...any),
|
||||
) ([]AdvisoryFile, error) {
|
||||
|
|
@ -197,29 +200,53 @@ func (afp *AdvisoryFileProcessor) loadIndex(
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
changesURL := base.JoinPath("changes.csv").String()
|
||||
|
||||
indexURL := base.JoinPath("index.txt").String()
|
||||
resp, err := afp.client.Get(indexURL)
|
||||
resp, err := afp.client.Get(changesURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("fetching %s failed. Status code %d (%s)",
|
||||
changesURL, resp.StatusCode, resp.Status)
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
var files []AdvisoryFile
|
||||
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
|
||||
for line := 1; scanner.Scan(); line++ {
|
||||
u := scanner.Text()
|
||||
if _, err := url.Parse(u); err != nil {
|
||||
lg("index.txt contains invalid URL %q in line %d", u, line)
|
||||
c := csv.NewReader(resp.Body)
|
||||
const (
|
||||
pathColumn = 0
|
||||
timeColumn = 1
|
||||
)
|
||||
for line := 1; ; line++ {
|
||||
r, err := c.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(r) < 2 {
|
||||
lg("%q has not enough columns in line %d", line)
|
||||
continue
|
||||
}
|
||||
t, err := time.Parse(time.RFC3339, r[timeColumn])
|
||||
if err != nil {
|
||||
lg("%q has an invalid time stamp in line %d: %v", changesURL, line, err)
|
||||
continue
|
||||
}
|
||||
// Apply date range filtering.
|
||||
if afp.AgeAccept != nil && !afp.AgeAccept(t) {
|
||||
continue
|
||||
}
|
||||
path := r[pathColumn]
|
||||
if _, err := url.Parse(path); err != nil {
|
||||
lg("%q contains an invalid URL %q in line %d", changesURL, path, line)
|
||||
continue
|
||||
}
|
||||
files = append(files,
|
||||
PlainAdvisoryFile(base.JoinPath(u).String()))
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
PlainAdvisoryFile(base.JoinPath(path).String()))
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
|
@ -287,6 +314,13 @@ func (afp *AdvisoryFileProcessor) processROLIE(
|
|||
|
||||
rfeed.Entries(func(entry *Entry) {
|
||||
|
||||
// Filter if we have date checking.
|
||||
if afp.AgeAccept != nil {
|
||||
if pub := time.Time(entry.Published); !pub.IsZero() && !afp.AgeAccept(pub) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
var self, sha256, sha512, sign string
|
||||
|
||||
for i := range entry.Link {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ Application Options:
|
|||
-v, --verbose Verbose output
|
||||
-r, --rate= The average upper limit of https operations per second (defaults to unlimited)
|
||||
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
|
||||
-t, --timerange=RANGE RANGE of time from which advisories to download
|
||||
-H, --header= One or more extra HTTP header fields
|
||||
--validator=URL URL to validate documents remotely
|
||||
--validatorcache=FILE FILE to cache remote validations
|
||||
|
|
@ -54,4 +55,34 @@ worker = 2
|
|||
# validator # not set by default
|
||||
# validatorcache # not set by default
|
||||
validatorpreset = ["mandatory"]
|
||||
# timerange # not set by default
|
||||
```
|
||||
|
||||
The `timerange` parameter enables downloading advisories which last changes falls
|
||||
into a given intervall. There are three possible notations:
|
||||
|
||||
1. Relative. If the given string follows the rules of being a [Go duration](https://pkg.go.dev/time@go1.20.6#ParseDuration)
|
||||
the time interval from now minus that duration till now is used.
|
||||
E.g. `"3h"` means downloading the advisories that have changed in the last three hours.
|
||||
|
||||
2. Absolute. If the given string is an RFC 3339 date timestamp the time interval between
|
||||
this date and now is used.
|
||||
E.g. `"2006-01-02"` means that all files between 2006 January 2nd and now going to being
|
||||
downloaded.
|
||||
Accepted patterns are:
|
||||
- `"2006-01-02T15:04:05Z"`
|
||||
- `"2006-01-02T15:04:05+07:00"`
|
||||
- `"2006-01-02T15:04:05-07:00"`
|
||||
- `"2006-01-02T15:04:05"`
|
||||
- `"2006-01-02T15:04"`
|
||||
- `"2006-01-02T15"`
|
||||
- `"2006-01-02"`
|
||||
- `"2006-01"`
|
||||
- `"2006"`
|
||||
|
||||
Missing parts are set to the smallest value possible in that field.
|
||||
|
||||
3. Range. Same as 2 but separated by a `,` to span an interval. e.g `2019,2024`
|
||||
spans an interval from 1st January 2019 to the 1st January of 2024.
|
||||
|
||||
All interval boundaries are inclusive.
|
||||
|
|
|
|||
92
internal/models/models.go
Normal file
92
internal/models/models.go
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
// This file is Free Software under the MIT License
|
||||
// without warranty, see README.md and LICENSES/MIT.txt for details.
|
||||
//
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
|
||||
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>
|
||||
|
||||
// Package models contains helper models used in the tools internally.
|
||||
package models
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TimeRange is a time interval.
|
||||
type TimeRange [2]time.Time
|
||||
|
||||
// NewTimeInterval creates a new time range.
|
||||
// The time values will be sorted.
|
||||
func NewTimeInterval(a, b time.Time) TimeRange {
|
||||
if b.Before(a) {
|
||||
a, b = b, a
|
||||
}
|
||||
return TimeRange{a, b}
|
||||
}
|
||||
|
||||
// guessDate tries to guess an RFC 3339 date time from a given string.
|
||||
func guessDate(s string) (time.Time, bool) {
|
||||
for _, layout := range []string{
|
||||
"2006-01-02T15:04:05Z07:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02T15:04",
|
||||
"2006-01-02T15",
|
||||
"2006-01-02",
|
||||
"2006-01",
|
||||
"2006",
|
||||
} {
|
||||
if t, err := time.Parse(layout, s); err == nil {
|
||||
return t, true
|
||||
}
|
||||
}
|
||||
return time.Time{}, false
|
||||
}
|
||||
|
||||
// UnmarshalText implements [encoding/text.TextUnmarshaler].
|
||||
func (tr *TimeRange) UnmarshalText(text []byte) error {
|
||||
return tr.UnmarshalFlag(string(text))
|
||||
}
|
||||
|
||||
// UnmarshalFlag implements [go-flags/Unmarshaler].
|
||||
func (tr *TimeRange) UnmarshalFlag(s string) error {
|
||||
s = strings.TrimSpace(s)
|
||||
|
||||
// Handle relative case first.
|
||||
if duration, err := time.ParseDuration(s); err == nil {
|
||||
now := time.Now()
|
||||
*tr = NewTimeInterval(now.Add(-duration), now)
|
||||
return nil
|
||||
}
|
||||
|
||||
a, b, found := strings.Cut(s, ",")
|
||||
a, b = strings.TrimSpace(a), strings.TrimSpace(b)
|
||||
|
||||
// Only start date?
|
||||
if !found {
|
||||
start, ok := guessDate(a)
|
||||
if !ok {
|
||||
return fmt.Errorf("%q is not a valid RFC date time", a)
|
||||
}
|
||||
*tr = NewTimeInterval(start, time.Now())
|
||||
return nil
|
||||
}
|
||||
// Real interval
|
||||
start, ok := guessDate(a)
|
||||
if !ok {
|
||||
return fmt.Errorf("%q is not a valid RFC date time", a)
|
||||
}
|
||||
end, ok := guessDate(b)
|
||||
if !ok {
|
||||
return fmt.Errorf("%q is not a valid RFC date time", b)
|
||||
}
|
||||
*tr = NewTimeInterval(start, end)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Contains return true if the given time is inside this time interval.
|
||||
func (tr TimeRange) Contains(t time.Time) bool {
|
||||
return !(t.Before(tr[0]) || t.After(tr[1]))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue