1
0
Fork 0
mirror of https://github.com/gocsaf/csaf.git synced 2025-12-22 18:15:42 +01:00

Merge pull request #413 from csaf-poc/downloader-interval

Downloader: Make time range configurable to download advisories from
This commit is contained in:
JanHoefelmeyer 2023-07-28 09:43:49 +02:00 committed by GitHub
commit 8a9dd6e842
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 199 additions and 37 deletions

View file

@ -76,8 +76,7 @@ func (w *worker) mirrorInternal() (*csaf.AggregatorCSAFProvider, error) {
w.client,
w.expr,
w.metadataProvider,
base,
nil)
base)
if err := afp.Process(w.mirrorFiles); err != nil {
return nil, err

View file

@ -11,6 +11,7 @@ package main
import (
"net/http"
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
)
@ -27,6 +28,7 @@ type config struct {
Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"`
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`

View file

@ -150,8 +150,12 @@ func (d *downloader) download(ctx context.Context, domain string) error {
client,
d.eval,
lpmd.Document,
base,
nil)
base)
// Do we need time range based filtering?
if d.cfg.Range != nil {
afp.AgeAccept = d.cfg.Range.Contains
}
return afp.Process(func(label csaf.TLPLabel, files []csaf.AdvisoryFile) error {
return d.downloadFiles(ctx, label, files)

View file

@ -9,11 +9,14 @@
package csaf
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strings"
"time"
"github.com/csaf-poc/csaf_distribution/v2/util"
)
@ -71,11 +74,12 @@ func (haf HashedAdvisoryFile) SignURL() string { return haf.name(3, ".asc") }
// AdvisoryFileProcessor implements the extraction of
// advisory file names from a given provider metadata.
type AdvisoryFileProcessor struct {
AgeAccept func(time.Time) bool
Log func(format string, args ...any)
client util.Client
expr *util.PathEval
doc any
base *url.URL
log func(format string, args ...any)
}
// NewAdvisoryFileProcessor constructs an filename extractor
@ -85,14 +89,12 @@ func NewAdvisoryFileProcessor(
expr *util.PathEval,
doc any,
base *url.URL,
log func(format string, args ...any),
) *AdvisoryFileProcessor {
return &AdvisoryFileProcessor{
client: client,
expr: expr,
doc: doc,
base: base,
log: log,
}
}
@ -111,7 +113,7 @@ func empty(arr []string) bool {
func (afp *AdvisoryFileProcessor) Process(
fn func(TLPLabel, []AdvisoryFile) error,
) error {
lg := afp.log
lg := afp.Log
if lg == nil {
lg = func(format string, args ...any) {
log.Printf("AdvisoryFileProcessor.Process: "+format, args...)
@ -173,7 +175,8 @@ func (afp *AdvisoryFileProcessor) Process(
continue
}
files, err := afp.loadIndex(base, lg)
// Use changes.csv to be able to filter by age.
files, err := afp.loadChanges(base, lg)
if err != nil {
return err
}
@ -186,9 +189,9 @@ func (afp *AdvisoryFileProcessor) Process(
return nil
}
// loadIndex loads baseURL/index.txt and returns a list of files
// loadChanges loads baseURL/changes.csv and returns a list of files
// prefixed by baseURL/.
func (afp *AdvisoryFileProcessor) loadIndex(
func (afp *AdvisoryFileProcessor) loadChanges(
baseURL string,
lg func(string, ...any),
) ([]AdvisoryFile, error) {
@ -197,29 +200,53 @@ func (afp *AdvisoryFileProcessor) loadIndex(
if err != nil {
return nil, err
}
changesURL := base.JoinPath("changes.csv").String()
indexURL := base.JoinPath("index.txt").String()
resp, err := afp.client.Get(indexURL)
resp, err := afp.client.Get(changesURL)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fetching %s failed. Status code %d (%s)",
changesURL, resp.StatusCode, resp.Status)
}
defer resp.Body.Close()
var files []AdvisoryFile
scanner := bufio.NewScanner(resp.Body)
for line := 1; scanner.Scan(); line++ {
u := scanner.Text()
if _, err := url.Parse(u); err != nil {
lg("index.txt contains invalid URL %q in line %d", u, line)
c := csv.NewReader(resp.Body)
const (
pathColumn = 0
timeColumn = 1
)
for line := 1; ; line++ {
r, err := c.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if len(r) < 2 {
lg("%q has not enough columns in line %d", line)
continue
}
t, err := time.Parse(time.RFC3339, r[timeColumn])
if err != nil {
lg("%q has an invalid time stamp in line %d: %v", changesURL, line, err)
continue
}
// Apply date range filtering.
if afp.AgeAccept != nil && !afp.AgeAccept(t) {
continue
}
path := r[pathColumn]
if _, err := url.Parse(path); err != nil {
lg("%q contains an invalid URL %q in line %d", changesURL, path, line)
continue
}
files = append(files,
PlainAdvisoryFile(base.JoinPath(u).String()))
}
if err := scanner.Err(); err != nil {
return nil, err
PlainAdvisoryFile(base.JoinPath(path).String()))
}
return files, nil
}
@ -287,6 +314,13 @@ func (afp *AdvisoryFileProcessor) processROLIE(
rfeed.Entries(func(entry *Entry) {
// Filter if we have date checking.
if afp.AgeAccept != nil {
if pub := time.Time(entry.Published); !pub.IsZero() && !afp.AgeAccept(pub) {
return
}
}
var self, sha256, sha512, sign string
for i := range entry.Link {

View file

@ -14,6 +14,7 @@ Application Options:
-v, --verbose Verbose output
-r, --rate= The average upper limit of https operations per second (defaults to unlimited)
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
-t, --timerange=RANGE RANGE of time from which advisories to download
-H, --header= One or more extra HTTP header fields
--validator=URL URL to validate documents remotely
--validatorcache=FILE FILE to cache remote validations
@ -54,4 +55,34 @@ worker = 2
# validator # not set by default
# validatorcache # not set by default
validatorpreset = ["mandatory"]
# timerange # not set by default
```
The `timerange` parameter enables downloading advisories which last changes falls
into a given intervall. There are three possible notations:
1. Relative. If the given string follows the rules of being a [Go duration](https://pkg.go.dev/time@go1.20.6#ParseDuration)
the time interval from now minus that duration till now is used.
E.g. `"3h"` means downloading the advisories that have changed in the last three hours.
2. Absolute. If the given string is an RFC 3339 date timestamp the time interval between
this date and now is used.
E.g. `"2006-01-02"` means that all files between 2006 January 2nd and now going to being
downloaded.
Accepted patterns are:
- `"2006-01-02T15:04:05Z"`
- `"2006-01-02T15:04:05+07:00"`
- `"2006-01-02T15:04:05-07:00"`
- `"2006-01-02T15:04:05"`
- `"2006-01-02T15:04"`
- `"2006-01-02T15"`
- `"2006-01-02"`
- `"2006-01"`
- `"2006"`
Missing parts are set to the smallest value possible in that field.
3. Range. Same as 2 but separated by a `,` to span an interval. e.g `2019,2024`
spans an interval from 1st January 2019 to the 1st January of 2024.
All interval boundaries are inclusive.

92
internal/models/models.go Normal file
View file

@ -0,0 +1,92 @@
// This file is Free Software under the MIT License
// without warranty, see README.md and LICENSES/MIT.txt for details.
//
// SPDX-License-Identifier: MIT
//
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>
// Package models contains helper models used in the tools internally.
package models
import (
"fmt"
"strings"
"time"
)
// TimeRange is a time interval.
type TimeRange [2]time.Time
// NewTimeInterval creates a new time range.
// The time values will be sorted.
func NewTimeInterval(a, b time.Time) TimeRange {
if b.Before(a) {
a, b = b, a
}
return TimeRange{a, b}
}
// guessDate tries to guess an RFC 3339 date time from a given string.
func guessDate(s string) (time.Time, bool) {
for _, layout := range []string{
"2006-01-02T15:04:05Z07:00",
"2006-01-02T15:04:05",
"2006-01-02T15:04",
"2006-01-02T15",
"2006-01-02",
"2006-01",
"2006",
} {
if t, err := time.Parse(layout, s); err == nil {
return t, true
}
}
return time.Time{}, false
}
// UnmarshalText implements [encoding/text.TextUnmarshaler].
func (tr *TimeRange) UnmarshalText(text []byte) error {
return tr.UnmarshalFlag(string(text))
}
// UnmarshalFlag implements [go-flags/Unmarshaler].
func (tr *TimeRange) UnmarshalFlag(s string) error {
s = strings.TrimSpace(s)
// Handle relative case first.
if duration, err := time.ParseDuration(s); err == nil {
now := time.Now()
*tr = NewTimeInterval(now.Add(-duration), now)
return nil
}
a, b, found := strings.Cut(s, ",")
a, b = strings.TrimSpace(a), strings.TrimSpace(b)
// Only start date?
if !found {
start, ok := guessDate(a)
if !ok {
return fmt.Errorf("%q is not a valid RFC date time", a)
}
*tr = NewTimeInterval(start, time.Now())
return nil
}
// Real interval
start, ok := guessDate(a)
if !ok {
return fmt.Errorf("%q is not a valid RFC date time", a)
}
end, ok := guessDate(b)
if !ok {
return fmt.Errorf("%q is not a valid RFC date time", b)
}
*tr = NewTimeInterval(start, end)
return nil
}
// Contains return true if the given time is inside this time interval.
func (tr TimeRange) Contains(t time.Time) bool {
return !(t.Before(tr[0]) || t.After(tr[1]))
}