diff --git a/internal/pkg/postprocessor/assets.go b/internal/pkg/postprocessor/assets.go
index cdba3058..6b28ad66 100644
--- a/internal/pkg/postprocessor/assets.go
+++ b/internal/pkg/postprocessor/assets.go
@@ -9,7 +9,9 @@ import (
 	"github.com/internetarchive/Zeno/pkg/models"
 )
 
-func extractAssets(item *models.Item) (assets []*models.URL, err error) {
+// extractAssets extracts assets from the item's body and returns them.
+// It also potentially returns outlinks if the body contains URLs that are not assets.
+func extractAssets(item *models.Item) (assets, outlinks []*models.URL, err error) {
 	var (
 		contentType = item.GetURL().GetResponse().Header.Get("Content-Type")
 		logger      = log.NewFieldedLogger(&log.Fields{
@@ -25,57 +27,62 @@ func extractAssets(item *models.Item) (assets []*models.URL, err error) {
 		INAAssets, err := ina.ExtractMedias(item.GetURL())
 		if err != nil {
 			logger.Error("unable to extract medias from INA", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 
 		HTMLAssets, err := extractor.HTMLAssets(item)
 		if err != nil {
 			logger.Error("unable to extract assets", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 
 		assets = append(INAAssets, HTMLAssets...)
 	case truthsocial.NeedExtraction(item.GetURL()):
-		assets, err = truthsocial.ExtractAssets(item)
+		assets, outlinks, err = truthsocial.ExtractAssets(item)
 		if err != nil {
 			logger.Error("unable to extract assets from TruthSocial", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 	case extractor.IsM3U8(item.GetURL()):
 		assets, err = extractor.M3U8(item.GetURL())
 		if err != nil {
 			logger.Error("unable to extract assets", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 	case extractor.IsJSON(item.GetURL()):
-		assets, err = extractor.JSON(item.GetURL())
+		assets, outlinks, err = extractor.JSON(item.GetURL())
 		if err != nil {
 			logger.Error("unable to extract assets", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 	case extractor.IsXML(item.GetURL()):
-		assets, err = extractor.XML(item.GetURL())
+		assets, outlinks, err = extractor.XML(item.GetURL())
 		if err != nil {
 			logger.Error("unable to extract assets", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 	case extractor.IsHTML(item.GetURL()):
 		assets, err = extractor.HTMLAssets(item)
 		if err != nil {
 			logger.Error("unable to extract assets", "err", err.Error(), "item", item.GetShortID())
-			return assets, err
+			return assets, outlinks, err
 		}
 	default:
 		logger.Debug("no extractor used for page", "content-type", contentType, "item", item.GetShortID())
-		return assets, nil
+		return assets, outlinks, nil
 	}
 
-	// Set the hops level to the item's level
+	// For assets, set the hops level to the item's level
 	for _, asset := range assets {
 		asset.SetHops(item.GetURL().GetHops())
 	}
 
-	return assets, nil
+	// For outlinks, set the hops level to the item's level + 1
+	for _, outlink := range outlinks {
+		outlink.SetHops(item.GetURL().GetHops() + 1)
+	}
+
+	return assets, outlinks, nil
 }
 
 func shouldExtractAssets(item *models.Item) bool {
diff --git a/internal/pkg/postprocessor/extractor/json.go b/internal/pkg/postprocessor/extractor/json.go
index ad90278c..941702b9 100644
--- a/internal/pkg/postprocessor/extractor/json.go
+++ b/internal/pkg/postprocessor/extractor/json.go
@@ -2,8 +2,8 @@ package extractor
 
 import (
 	"encoding/json"
-	"net/url"
 
+	"github.com/ImVexed/fasturl"
 	"github.com/internetarchive/Zeno/pkg/models"
 )
 
@@ -11,27 +11,34 @@ func IsJSON(URL *models.URL) bool {
 	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "json")
 }
 
-func JSON(URL *models.URL) (assets []*models.URL, err error) {
+func JSON(URL *models.URL) (assets, outlinks []*models.URL, err error) {
 	defer URL.RewindBody()
 
 	bodyBytes := make([]byte, URL.GetBody().Len())
 	_, err = URL.GetBody().Read(bodyBytes)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
-	rawAssets, err := GetURLsFromJSON(bodyBytes)
+	rawURLs, err := GetURLsFromJSON(bodyBytes)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
-	for _, rawAsset := range rawAssets {
-		assets = append(assets, &models.URL{
-			Raw: rawAsset,
-		})
+	// We only consider as assets the URLs in which we can find a file extension
+	for _, rawURL := range rawURLs {
+		if hasFileExtension(rawURL) {
+			assets = append(assets, &models.URL{
+				Raw: rawURL,
+			})
+		} else {
+			outlinks = append(outlinks, &models.URL{
+				Raw: rawURL,
+			})
+		}
 	}
 
-	return assets, err
+	return assets, outlinks, nil
 }
 
 func GetURLsFromJSON(body []byte) ([]string, error) {
@@ -65,6 +72,6 @@ func findURLs(data interface{}, links *[]string) {
 }
 
 func isValidURL(str string) bool {
-	u, err := url.Parse(str)
-	return err == nil && u.Scheme != "" && u.Host != ""
+	u, err := fasturl.ParseURL(str)
+	return err == nil && u.Host != ""
 }
diff --git a/internal/pkg/postprocessor/extractor/json_test.go b/internal/pkg/postprocessor/extractor/json_test.go
index 6330af85..6426432a 100644
--- a/internal/pkg/postprocessor/extractor/json_test.go
+++ b/internal/pkg/postprocessor/extractor/json_test.go
@@ -74,7 +74,7 @@ func TestJSON(t *testing.T) {
 				t.Errorf("ProcessBody() error = %v", err)
 			}
 
-			gotURLs, err := JSON(URL)
+			assets, _, err := JSON(URL)
 
 			if (err != nil) != tt.wantErr {
 				t.Errorf("JSON() error = %v, wantErr %v", err, tt.wantErr)
@@ -82,16 +82,16 @@ func TestJSON(t *testing.T) {
 			}
 
 			// Sort both slices before comparison
-			sortURLs(gotURLs)
+			sortURLs(assets)
 			sortURLs(tt.wantURLs)
 
-			if len(gotURLs) != len(tt.wantURLs) {
-				t.Fatalf("Expected %d URLs, got %d", len(tt.wantURLs), len(gotURLs))
+			if len(assets) != len(tt.wantURLs) {
+				t.Fatalf("Expected %d URLs, got %d", len(tt.wantURLs), len(assets))
 			}
 
-			for i := range gotURLs {
-				if gotURLs[i].Raw != tt.wantURLs[i].Raw {
-					t.Errorf("Expected URL %s, got %s", tt.wantURLs[i].Raw, gotURLs[i].Raw)
+			for i := range assets {
+				if assets[i].Raw != tt.wantURLs[i].Raw {
+					t.Errorf("Expected URL %s, got %s", tt.wantURLs[i].Raw, assets[i].Raw)
 				}
 			}
 		})
diff --git a/internal/pkg/postprocessor/extractor/utils.go b/internal/pkg/postprocessor/extractor/utils.go
index 18d76800..5c3bd70b 100644
--- a/internal/pkg/postprocessor/extractor/utils.go
+++ b/internal/pkg/postprocessor/extractor/utils.go
@@ -1,7 +1,6 @@
 package extractor
 
 import (
-	"net/url"
 	"regexp"
 	"sort"
 	"strings"
@@ -17,41 +16,42 @@ var (
 	AssetsRegex      = `(?i)\b(?:src|href)=["']([^"']+\.(?:css|js|png|jpg|jpeg|gif|svg|webp|woff|woff2|ttf|eot))["']`
 )
 
-func isContentType(header, targetContentType string) bool {
-	// Lowercase the header and target content type for case-insensitive comparison
-	header = strings.ToLower(header)
-	targetContentType = strings.ToLower(targetContentType)
-
-	return strings.Contains(header, targetContentType)
-}
-
-// compareURLs compares two slices of *url.URL
-func compareURLs(a, b []*url.URL) bool {
-	if len(a) != len(b) {
-		return false
+// hasFileExtension checks if a URL has a file extension in it.
+// It might yield false positives, like https://example.com/super.idea,
+// but it's good enough for our purposes.
+func hasFileExtension(s string) bool {
+	// Remove fragment portion (#...)
+	if i := strings.IndexByte(s, '#'); i != -1 {
+		s = s[:i]
 	}
-
-	// Create a map to store the count of each URL in slice a
-	counts := make(map[string]int)
-	for _, url := range a {
-		counts[url.String()]++
+	// Remove query portion (?...)
+	if i := strings.IndexByte(s, '?'); i != -1 {
+		s = s[:i]
 	}
 
-	// Decrement the count for each URL in slice b
-	for _, url := range b {
-		counts[url.String()]--
+	// Keep only the substring after the last slash
+	if slashPos := strings.LastIndexByte(s, '/'); slashPos != -1 {
+		s = s[slashPos+1:]
 	}
 
-	// Check if any count is non-zero, indicating a mismatch
-	for _, count := range counts {
-		if count != 0 {
-			return false
-		}
+	// Find the last '.' in the file name
+	dotPos := strings.LastIndexByte(s, '.')
+	if dotPos == -1 || dotPos == len(s)-1 {
+		// No '.' or '.' is the last character -> no valid extension
+		return false
 	}
 
 	return true
 }
 
+func isContentType(header, targetContentType string) bool {
+	// Lowercase the header and target content type for case-insensitive comparison
+	header = strings.ToLower(header)
+	targetContentType = strings.ToLower(targetContentType)
+
+	return strings.Contains(header, targetContentType)
+}
+
 // sortURLs sorts a slice of *url.URL
 func sortURLs(urls []*models.URL) {
 	sort.Slice(urls, func(i, j int) bool {
diff --git a/internal/pkg/postprocessor/extractor/utils_test.go b/internal/pkg/postprocessor/extractor/utils_test.go
new file mode 100644
index 00000000..839537b7
--- /dev/null
+++ b/internal/pkg/postprocessor/extractor/utils_test.go
@@ -0,0 +1,111 @@
+package extractor
+
+import "testing"
+
+func TestHasFileExtension(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  bool
+	}{
+		{
+			name:  "Simple JPG extension",
+			input: "http://example.com/image.jpg",
+			want:  true,
+		},
+		{
+			name:  "Query param after extension",
+			input: "https://example.org/dog.png?foo=bar",
+			want:  true,
+		},
+		{
+			name:  "Fragment after extension",
+			input: "https://test.com/cat.gif#section1",
+			want:  true,
+		},
+		{
+			name:  "No extension at all",
+			input: "http://example.com/foo",
+			want:  false,
+		},
+		{
+			name:  "Trailing slash after potential extension",
+			input: "http://example.com/foo.txt/",
+			want:  false, // The extension is not truly at the end
+		},
+		{
+			name:  "Extension deeper in path",
+			input: "http://example.com/data.txt/archive",
+			want:  false, // The .txt is not the last segment
+		},
+		{
+			name:  "Multiple dots, multiple segments",
+			input: "http://example.net/backups/data.tar.gz?version=2",
+			want:  true,
+		},
+		{
+			name:  "Hidden file style, no extension (e.g. .htaccess)",
+			input: "https://example.com/.htaccess",
+			want:  true,
+		},
+		{
+			name:  "Dot at the end only (no extension)",
+			input: "http://example.org/name.",
+			want:  false, // There's no extension after the final dot
+		},
+		{
+			name:  "Just a plain filename with extension, no slashes",
+			input: "file.zip",
+			want:  true,
+		},
+		{
+			name:  "Filename with multiple dots in the last segment",
+			input: "https://example.io/some.dir/my.file.name.txt",
+			want:  true,
+		},
+		{
+			name:  "Parameters but no dot in final segment",
+			input: "https://example.com/paramCheck?this=that",
+			want:  false,
+		},
+		{
+			name:  "Multiple slashes near the end",
+			input: "http://example.com/dir/subdir/.hidden/",
+			want:  false,
+		},
+		{
+			name:  "Dot in subdirectory name only",
+			input: "http://example.com/dir.withdot/filename",
+			want:  false,
+		},
+		{
+			name:  "Extension is the last item plus fragment",
+			input: "http://example.com/test.db#backup",
+			want:  true,
+		},
+		{
+			name:  "No slash, no dot, random string",
+			input: "thisIsJustAString",
+			want:  false,
+		},
+		{
+			name:  "Multiple dots in final segment with a trailing query",
+			input: "http://example.com/foo.bar.baz.qux?stuff=1",
+			want:  true,
+		},
+		{
+			name:  "Extension disguised with a slash in the query",
+			input: "http://example.com/data.zip?path=/etc/passwd",
+			want:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := hasFileExtension(tt.input)
+			if got != tt.want {
+				t.Errorf("hasFileExtension(%q) = %v; want %v", tt.input, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/internal/pkg/postprocessor/extractor/xml.go b/internal/pkg/postprocessor/extractor/xml.go
index e1eac8c8..11f37d4a 100644
--- a/internal/pkg/postprocessor/extractor/xml.go
+++ b/internal/pkg/postprocessor/extractor/xml.go
@@ -28,23 +28,23 @@ func IsSitemapXML(URL *models.URL) bool {
 	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "xml") && bytes.Contains(xmlBody, sitemapMarker)
 }
 
-func XML(URL *models.URL) (assets []*models.URL, err error) {
+func XML(URL *models.URL) (assets, outlinks []*models.URL, err error) {
 	defer URL.RewindBody()
 
 	xmlBody, err := io.ReadAll(URL.GetBody())
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
 	if len(xmlBody) == 0 {
-		return nil, errors.New("empty XML body")
+		return nil, nil, errors.New("empty XML body")
 	}
 
 	decoder := xml.NewDecoder(bytes.NewReader(xmlBody))
 	decoder.Strict = false
 
 	var tok xml.Token
-	var rawAssets []string
+	var rawURLs []string
 	for {
 		tok, err = decoder.RawToken()
 
@@ -55,31 +55,38 @@ func XML(URL *models.URL) (assets []*models.URL, err error) {
 
 		if err != nil {
 			// return URLs we got so far when error occurs
-			return assets, err
+			return assets, outlinks, err
 		}
 
 		switch tok := tok.(type) {
 		case xml.StartElement:
 			for _, attr := range tok.Attr {
 				if strings.HasPrefix(attr.Value, "http") {
-					rawAssets = append(rawAssets, attr.Value)
+					rawURLs = append(rawURLs, attr.Value)
 				}
 			}
 		case xml.CharData:
 			if bytes.HasPrefix(tok, []byte("http")) {
-				rawAssets = append(rawAssets, string(tok))
+				rawURLs = append(rawURLs, string(tok))
 			} else {
 				// Try to extract URLs from the text
-				rawAssets = append(rawAssets, utils.DedupeStrings(LinkRegexRelaxed.FindAllString(string(tok), -1))...)
+				rawURLs = append(rawURLs, utils.DedupeStrings(LinkRegexRelaxed.FindAllString(string(tok), -1))...)
 			}
 		}
 	}
 
-	for _, rawAsset := range rawAssets {
-		assets = append(assets, &models.URL{
-			Raw: rawAsset,
-		})
+	// We only consider as assets the URLs in which we can find a file extension
+	for _, rawURL := range rawURLs {
+		if hasFileExtension(rawURL) {
+			assets = append(assets, &models.URL{
+				Raw: rawURL,
+			})
+		} else {
+			outlinks = append(outlinks, &models.URL{
+				Raw: rawURL,
+			})
+		}
 	}
 
-	return assets, nil
+	return assets, outlinks, nil
 }
diff --git a/internal/pkg/postprocessor/extractor/xml_test.go b/internal/pkg/postprocessor/extractor/xml_test.go
index 92681fb8..4896a5ff 100644
--- a/internal/pkg/postprocessor/extractor/xml_test.go
+++ b/internal/pkg/postprocessor/extractor/xml_test.go
@@ -135,18 +135,21 @@ func TestXML(t *testing.T) {
 				t.Errorf("ProcessBody() error = %v", err)
 			}
 
-			assets, err := XML(URL)
+			assets, outlinks, err := XML(URL)
+
+			URLs := append(assets, outlinks...)
+
 			if (err != nil) != tt.hasError {
 				t.Fatalf("XML() error = %v, wantErr %v", err, tt.hasError)
 			}
 
-			if len(assets) != len(tt.expected) {
-				t.Fatalf("Expected %d assets, got %d", len(tt.expected), len(assets))
+			if len(URLs) != len(tt.expected) {
+				t.Fatalf("Expected %d assets, got %d", len(tt.expected), len(URLs))
 			}
 
-			for i, asset := range assets {
-				if asset.Raw != tt.expected[i] {
-					t.Errorf("Expected asset %s, got %s", tt.expected[i], asset.Raw)
+			for i, URL := range URLs {
+				if URL.Raw != tt.expected[i] {
+					t.Errorf("Expected asset %s, got %s", tt.expected[i], URL.Raw)
 				}
 			}
 		})
diff --git a/internal/pkg/postprocessor/item.go b/internal/pkg/postprocessor/item.go
index 7fac0d17..596f6c20 100644
--- a/internal/pkg/postprocessor/item.go
+++ b/internal/pkg/postprocessor/item.go
@@ -84,9 +84,14 @@ func postprocessItem(item *models.Item) []*models.Item {
 	if item.GetURL().GetResponse() != nil && item.GetURL().GetResponse().StatusCode == 200 {
 		logger.Debug("item is a success", "item_id", item.GetShortID())
 
+		var outlinksFromAssets []*models.URL
+
 		// Extract assets from the page
 		if shouldExtractAssets(item) {
-			assets, err := extractAssets(item)
+			var assets []*models.URL
+			var err error
+
+			assets, outlinksFromAssets, err = extractAssets(item)
 			if err != nil {
 				logger.Error("unable to extract assets", "err", err.Error(), "item_id", item.GetShortID())
 			} else {
@@ -113,6 +118,9 @@ func postprocessItem(item *models.Item) []*models.Item {
 			if err != nil {
 				logger.Error("unable to extract outlinks", "err", err.Error(), "item_id", item.GetShortID())
 			} else {
+				// Append the outlinks found from the assets
+				newOutlinks = append(newOutlinks, outlinksFromAssets...)
+
 				for i := range newOutlinks {
 					if newOutlinks[i] == nil {
 						logger.Warn("nil link", "item_id", item.GetShortID())
diff --git a/internal/pkg/postprocessor/outlinks.go b/internal/pkg/postprocessor/outlinks.go
index 79de658c..82058c1d 100644
--- a/internal/pkg/postprocessor/outlinks.go
+++ b/internal/pkg/postprocessor/outlinks.go
@@ -47,11 +47,17 @@ func extractOutlinks(item *models.Item) (outlinks []*models.URL, err error) {
 			return outlinks, err
 		}
 	case extractor.IsSitemapXML(item.GetURL()):
-		outlinks, err = extractor.XML(item.GetURL())
+		var assets []*models.URL
+
+		assets, outlinks, err = extractor.XML(item.GetURL())
 		if err != nil {
 			logger.Error("unable to extract outlinks", "err", err.Error(), "item", item.GetShortID())
 			return outlinks, err
 		}
+
+		// Here we don't care about the difference between assets and outlinks,
+		// we just want to extract all the URLs from the sitemap
+		outlinks = append(outlinks, assets...)
 	case extractor.IsHTML(item.GetURL()):
 		outlinks, err := extractor.HTMLOutlinks(item)
 		if err != nil {
diff --git a/internal/pkg/postprocessor/sitespecific/truthsocial/truthsocial.go b/internal/pkg/postprocessor/sitespecific/truthsocial/truthsocial.go
index 7e7eb5af..b8e4e9b4 100644
--- a/internal/pkg/postprocessor/sitespecific/truthsocial/truthsocial.go
+++ b/internal/pkg/postprocessor/sitespecific/truthsocial/truthsocial.go
@@ -13,39 +13,38 @@ var (
 	usernameRegex      = regexp.MustCompile(`^https?:\/\/truthsocial\.com\/@([^/]+)`)
 	statusesRegex      = regexp.MustCompile(`^https?:\/\/truthsocial\.com\/api\/v1\/statuses\/\d+$`)
 	accountLookupRegex = regexp.MustCompile(`^https?:\/\/truthsocial\.com\/api\/v1\/accounts\/lookup\?acct=[a-zA-Z0-9]+$`)
-	truthsocialRegex   = regexp.MustCompile(`^https?:\/\/truthsocial\.com\/.*`)
 )
 
 func NeedExtraction(URL *models.URL) bool {
 	return IsStatusesURL(URL) || IsPostURL(URL)
 }
 
-func ExtractAssets(item *models.Item) (assets []*models.URL, err error) {
+func ExtractAssets(item *models.Item) (assets, outlinks []*models.URL, err error) {
 	if IsStatusesURL(item.GetURL()) {
 		truthsocialAssets, err := GenerateVideoURLsFromStatusesAPI(item.GetURL())
 		if err != nil {
-			return assets, err
+			return assets, outlinks, err
 		}
 
-		JSONAssets, err := extractor.JSON(item.GetURL())
+		JSONAssets, outlinks, err := extractor.JSON(item.GetURL())
 		if err != nil {
-			return assets, err
+			return assets, outlinks, err
 		}
 
 		assets = append(truthsocialAssets, JSONAssets...)
 	} else if IsPostURL(item.GetURL()) {
 		truthsocialAssets, err := GeneratePostAssetsURLs(item.GetURL())
 		if err != nil {
-			return assets, err
+			return assets, outlinks, err
 		}
 
 		HTMLAssets, err := extractor.HTMLAssets(item)
 		if err != nil {
-			return assets, err
+			return assets, outlinks, err
 		}
 
 		assets = append(truthsocialAssets, HTMLAssets...)
 	}
 
-	return assets, nil
+	return assets, outlinks, err
 }