1
0
Fork 0
mirror of https://github.com/gocsaf/csaf.git synced 2025-12-22 11:55:40 +01:00

Harvest only JSON files.

This commit is contained in:
Sascha L. Teichmann 2022-05-17 11:51:29 +02:00
parent 4428679822
commit eaa2620eba
2 changed files with 11 additions and 5 deletions

View file

@ -12,6 +12,7 @@ import (
"io" "io"
"net/http" "net/http"
"net/url" "net/url"
"strings"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
@ -66,6 +67,10 @@ func linksOnPage(r io.Reader, resolve func(string) (string, error)) ([]string, e
return return
} }
if link, ok := s.Attr("href"); ok { if link, ok := s.Attr("href"); ok {
// Only care for JSON files here.
if !strings.HasSuffix(link, ".json") {
return
}
if link, err = resolve(link); err == nil { if link, err = resolve(link); err == nil {
links = append(links, link) links = append(links, link)
} }

View file

@ -8,14 +8,15 @@ import (
const page0 = `<html> const page0 = `<html>
<body> <body>
<a href="link0">link0</a> <a href="no-a-json">Not a JSON</a>
<a href="link0.json">link0</a>
<ol> <ol>
<li><a href="link1">link1</a></li> <li><a href="link1.json">link1</a></li>
<li><a href="link2">link2</a></li> <li><a href="link2.json">link1</a></li>
</ol> </ol>
<p> <p>
<div> <div>
<li><a href="link3">link3</a></li> <li><a href="link3.json">link1</a></li>
</div> </div>
<p> <p>
</body> </body>
@ -36,7 +37,7 @@ func TestLinksOnPage(t *testing.T) {
} }
for i, link := range links { for i, link := range links {
href := fmt.Sprintf("link%d", i) href := fmt.Sprintf("link%d.json", i)
if href != link { if href != link {
t.Fatalf("Expected link '%s', got '%s'\n", href, link) t.Fatalf("Expected link '%s', got '%s'\n", href, link)
} }