diff --git a/csaf/providermetaloader.go b/csaf/providermetaloader.go index 4924aa0..1c321ed 100644 --- a/csaf/providermetaloader.go +++ b/csaf/providermetaloader.go @@ -9,8 +9,12 @@ package csaf import ( - "errors" - "log" + "bytes" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "net/http" "strings" "github.com/csaf-poc/csaf_distribution/util" @@ -19,8 +23,9 @@ import ( // ProviderMetadataLoader helps load provider-metadata.json from // the various locations. type ProviderMetadataLoader struct { - client *util.Client - logging func(string, ...any) + client util.Client + already map[string]*LoadedProviderMetadata + messages ProviderMetadataLoadMessages } // ProviderMetadataLoadMessageType is the type of the message. @@ -33,6 +38,15 @@ const ( SchemaValidationFailed // SchemaValidationFailedDetail is a failure detail in schema validation. SchemaValidationFailedDetail + // HTTPFailed indicates that loading on HTTP level failed. + HTTPFailed + // ExtraProviderMetadataFound indicates an extra PMD found in security.txt. + ExtraProviderMetadataFound + // WellknownSecurityMismatch indicates that the PMDs found under wellknown and + // in the security do not match. + WellknownSecurityMismatch + // IgnoreProviderMetadata indicates that a extra PMD was ignored. + IgnoreProviderMetadata ) // ProviderMetadataLoadMessage is a message generated while loading @@ -42,6 +56,9 @@ type ProviderMetadataLoadMessage struct { Message string } +// ProviderMetadataLoadMessages is a list of loading messages. +type ProviderMetadataLoadMessages []ProviderMetadataLoadMessage + // LoadedProviderMetadata represents a loaded provider metadata. type LoadedProviderMetadata struct { // URL is location where the document was found. @@ -51,7 +68,31 @@ type LoadedProviderMetadata struct { // Hash is a SHA256 sum over the document. Hash []byte // Messages are the error message happened while loading. - Messages []ProviderMetadataLoadMessage + Messages ProviderMetadataLoadMessages +} + +// Add appends a message to the list of loading messages. +func (pmlm *ProviderMetadataLoadMessages) Add( + typ ProviderMetadataLoadMessageType, + msg string, +) { + *pmlm = append(*pmlm, ProviderMetadataLoadMessage{ + Type: typ, + Message: msg, + }) +} + +// AppendUnique appends unique messages from a second list. +func (pmlm *ProviderMetadataLoadMessages) AppendUnique(other ProviderMetadataLoadMessages) { +next: + for _, o := range other { + for _, m := range *pmlm { + if m == o { + continue next + } + } + *pmlm = append(*pmlm, o) + } } // Valid returns true if the loaded document is valid. @@ -60,42 +101,240 @@ func (lpm *LoadedProviderMetadata) Valid() bool { } // NewProviderMetadataLoader create a new loader. -func NewProviderMetadataLoader( - client *util.Client, - logging func(string, ...any), -) *ProviderMetadataLoader { - - // If no logging was given log to stdout. - if logging == nil { - logging = func(format string, args ...any) { - log.Printf("ProviderMetadataLoader: "+format+"\n", args...) - } - } +func NewProviderMetadataLoader(client util.Client) *ProviderMetadataLoader { return &ProviderMetadataLoader{ client: client, - logging: logging, + already: map[string]*LoadedProviderMetadata{}, } } // Load loads a provider metadata for a given path. // If the domain starts with `https://` it only attemps to load // the data from that URL. -func (pmdl *ProviderMetadataLoader) Load(path string) (*LoadedProviderMetadata, error) { +func (pmdl *ProviderMetadataLoader) Load(domain string) *LoadedProviderMetadata { - // check direct path - if strings.HasPrefix(path, "https://") { - return pmdl.loadFromURL(path) + // Check direct path + if strings.HasPrefix(domain, "https://") { + lpmd, err := pmdl.loadFromURL(domain) + if err != nil { + lpmd = new(LoadedProviderMetadata) + lpmd.Messages.Add(HTTPFailed, err.Error()) + } + return lpmd } - // TODO: Implement me! - return nil, errors.New("not implemented, yet") + // First try the well-known path. + wellknownURL := "https://" + domain + "/.well-known/csaf/provider-metadata.json" + + wellknownResult, err := pmdl.loadFromURL(wellknownURL) + if err != nil { + pmdl.messages.Add(HTTPFailed, err.Error()) + } + + // Valid provider metadata under well-known. + var wellknownGood *LoadedProviderMetadata + + // We have a candidate. + if wellknownResult.Valid() { + wellknownGood = wellknownResult + } + + // Next load the PMDs from security.txt + secURL := "https://" + domain + "/.well-known/security.txt" + secResults := pmdl.loadFromSecurity(secURL) + + // Filter out the results which are valid. + var secGoods []*LoadedProviderMetadata + + for _, result := range secResults { + if len(result.Messages) > 0 { + // If there where validation issues append them + // to the overall report + pmdl.messages.AppendUnique(pmdl.messages) + } else { + secGoods = append(secGoods, result) + } + } + + // Mention extra CSAF entries in security.txt. + ignoreExtras := func() { + for _, extra := range secGoods[1:] { + pmdl.messages.Add( + ExtraProviderMetadataFound, + fmt.Sprintf("Ignoring extra CSAF entry in security.txt: %s", extra.URL)) + } + } + + // security.txt contains good entries. + if len(secGoods) > 0 { + // we already have a good wellknown, take it. + if wellknownGood != nil { + // check if first of security urls is identical to wellknown. + if bytes.Equal(wellknownGood.Hash, secGoods[0].Hash) { + ignoreExtras() + } else { + // Complaint about not matching. + pmdl.messages.Add( + WellknownSecurityMismatch, + "First entry of security.txt and well-known don't match.") + // List all the security urls. + for _, sec := range secGoods { + pmdl.messages.Add( + IgnoreProviderMetadata, + fmt.Sprintf("Ignoring CSAF entry in security.txt: %s", sec.URL)) + } + } + // Take the good well-known. + wellknownGood.Messages.AppendUnique(pmdl.messages) + return wellknownGood + } + + // Don't have well-known. Take first good from security.txt. + ignoreExtras() + secGoods[0].Messages.AppendUnique(pmdl.messages) + return secGoods[0] + } + + // If we have a good well-known take it. + if wellknownGood != nil { + wellknownGood.Messages.AppendUnique(pmdl.messages) + return wellknownGood + } + + // Last resort: fall back to DNS. + dnsURL := "https://csaf.data.security." + domain + + dnsResult, err := pmdl.loadFromURL(dnsURL) + if err != nil { + dnsResult = new(LoadedProviderMetadata) + pmdl.messages.Add( + HTTPFailed, + err.Error()) + } + dnsResult.Messages.AppendUnique(pmdl.messages) + return dnsResult +} + +// loadFromSecurity loads the PMDs mentioned in the security.txt. +func (pmdl *ProviderMetadataLoader) loadFromSecurity(path string) []*LoadedProviderMetadata { + + res, err := pmdl.client.Get(path) + if err != nil { + pmdl.messages.Add( + HTTPFailed, + fmt.Sprintf("Fetching %q failed: %v", path, err)) + return nil + } + if res.StatusCode != http.StatusOK { + pmdl.messages.Add( + HTTPFailed, + fmt.Sprintf("Fetching %q failed: %s (%d)", path, res.Status, res.StatusCode)) + return nil + } + + // Extract all potential URLs from CSAF. + urls, err := func() ([]string, error) { + defer res.Body.Close() + return ExtractProviderURL(res.Body, true) + }() + + if err != nil { + pmdl.messages.Add( + HTTPFailed, + fmt.Sprintf("Loading %q failed: %v", path, err)) + return nil + } + + var loaded []*LoadedProviderMetadata + + // Load the URLs +nextURL: + for _, url := range urls { + lpmd, err := pmdl.loadFromURL(url) + // If loading failed note it down. + if err != nil { + pmdl.messages.Add( + HTTPFailed, + fmt.Sprintf("Loading %q failed: %v", url, err)) + continue + } + // Check for duplicates + for _, l := range loaded { + if l == lpmd { + continue nextURL + } + } + loaded = append(loaded, lpmd) + } + + return loaded } // loadFromURL loads a provider metadata from a given URL. func (pmdl *ProviderMetadataLoader) loadFromURL(path string) (*LoadedProviderMetadata, error) { - _ = path + res, err := pmdl.client.Get(path) + if err != nil { + return nil, fmt.Errorf("fetching %q failed: %v", path, err) + } + if res.StatusCode != http.StatusOK { + return nil, fmt.Errorf("fetching %q failed: %s (%d)", path, res.Status, res.StatusCode) + } - // TODO: Implement me! - return nil, errors.New("not implemented, yet") + // TODO: Check for application/json and log it. + + defer res.Body.Close() + + // Calculate checksum for later comparison. + hash := sha256.New() + + result := LoadedProviderMetadata{URL: path} + + tee := io.TeeReader(res.Body, hash) + + var doc any + + if err := json.NewDecoder(tee).Decode(&doc); err != nil { + return nil, fmt.Errorf("JSON decoding failed: %w", err) + } + + // Before checking the err lets check if we had the same + // document before. If so it will have failed parsing before. + + sum := hash.Sum(nil) + key := string(sum) + + // If we already have loaded it return the cached result. + if r := pmdl.already[key]; r != nil { + return r, nil + } + + // write it back as loaded + + switch errors, err := ValidateProviderMetadata(doc); { + case err != nil: + result.Messages = []ProviderMetadataLoadMessage{{ + Type: SchemaValidationFailed, + Message: fmt.Sprintf("%s: Validating against JSON schema failed: %v", path, err), + }} + + case len(errors) > 0: + result.Messages = []ProviderMetadataLoadMessage{{ + Type: SchemaValidationFailed, + Message: fmt.Sprintf("%s: Validating against JSON schema failed: %v", path, err), + }} + for _, msg := range errors { + result.Messages = append(result.Messages, ProviderMetadataLoadMessage{ + Type: SchemaValidationFailedDetail, + Message: strings.ReplaceAll(msg, `%`, `%%`), + }) + } + default: + // Only store in result if validation passed. + result.Document = doc + result.Hash = sum + } + + pmdl.already[key] = &result + return &result, nil }