diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go
index 3a54e600..dcec199d 100644
--- a/internal/pkg/config/config.go
+++ b/internal/pkg/config/config.go
@@ -12,7 +12,6 @@ import (
 	"regexp"
 	"strings"
 	"sync"
-	"time"
 
 	"github.com/google/uuid"
 	"github.com/internetarchive/Zeno/internal/pkg/postprocessor/domainscrawl"
@@ -326,18 +325,7 @@ func readLocalExclusionFile(file string) (regexes []string, err error) {
 }
 
 func readRemoteExclusionFile(URL string) (regexes []string, err error) {
-	httpClient := &http.Client{
-		Timeout: time.Second * 5,
-	}
-
-	req, err := http.NewRequest(http.MethodGet, URL, nil)
-	if err != nil {
-		return regexes, err
-	}
-
-	req.Header.Set("User-Agent", config.UserAgent)
-
-	resp, err := httpClient.Do(req)
+	resp, err := http.Get(URL)
 	if err != nil {
 		return regexes, err
 	}
diff --git a/internal/pkg/postprocessor/extractor/html.go b/internal/pkg/postprocessor/extractor/html.go
index aef10be2..c0a29979 100644
--- a/internal/pkg/postprocessor/extractor/html.go
+++ b/internal/pkg/postprocessor/extractor/html.go
@@ -1,7 +1,6 @@
 package extractor
 
 import (
-	"encoding/json"
 	"regexp"
 	"strconv"
 	"strings"
@@ -19,7 +18,7 @@ var (
 )
 
 func IsHTML(URL *models.URL) bool {
-	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "html") || strings.Contains(URL.GetMIMEType().String(), "html")
+	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "html")
 }
 
 func HTMLOutlinks(item *models.Item) (outlinks []*models.URL, err error) {
@@ -103,7 +102,7 @@ func HTMLAssets(item *models.Item) (assets []*models.URL, err error) {
 	document.Find("[data-item]").Each(func(index int, i *goquery.Selection) {
 		dataItem, exists := i.Attr("data-item")
 		if exists {
-			URLsFromJSON, err := GetURLsFromJSON(json.NewDecoder(strings.NewReader(dataItem)))
+			URLsFromJSON, err := GetURLsFromJSON([]byte(dataItem))
 			if err != nil {
 				logger.Debug("unable to extract URLs from JSON in data-item attribute", "err", err, "url", item.GetURL().String(), "item", item.GetShortID())
 			} else {
@@ -223,7 +222,7 @@ func HTMLAssets(item *models.Item) (assets []*models.URL, err error) {
 			scriptType, exists := i.Attr("type")
 			if exists {
 				if scriptType == "application/json" {
-					URLsFromJSON, err := GetURLsFromJSON(json.NewDecoder(strings.NewReader(i.Text())))
+					URLsFromJSON, err := GetURLsFromJSON([]byte(i.Text()))
 					if err != nil {
 						// TODO: maybe add back when https://github.com/internetarchive/Zeno/issues/147 is fixed
 						// c.Log.Debug("unable to extract URLs from JSON in script tag", "error", err, "url", URL)
@@ -282,7 +281,7 @@ func HTMLAssets(item *models.Item) (assets []*models.URL, err error) {
 					}
 
 					if len(jsonContent[1]) > payloadEndPosition {
-						URLsFromJSON, err := GetURLsFromJSON(json.NewDecoder(strings.NewReader(jsonContent[1][:payloadEndPosition+1])))
+						URLsFromJSON, err := GetURLsFromJSON([]byte(jsonContent[1][:payloadEndPosition+1]))
 						if err != nil {
 							// TODO: maybe add back when https://github.com/internetarchive/Zeno/issues/147 is fixed
 							// c.Log.Debug("unable to extract URLs from JSON in script tag", "error", err, "url", URL)
diff --git a/internal/pkg/postprocessor/extractor/json.go b/internal/pkg/postprocessor/extractor/json.go
index a112ab79..941702b9 100644
--- a/internal/pkg/postprocessor/extractor/json.go
+++ b/internal/pkg/postprocessor/extractor/json.go
@@ -2,20 +2,25 @@ package extractor
 
 import (
 	"encoding/json"
-	"strings"
 
 	"github.com/ImVexed/fasturl"
 	"github.com/internetarchive/Zeno/pkg/models"
 )
 
 func IsJSON(URL *models.URL) bool {
-	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "json") || strings.Contains(URL.GetMIMEType().String(), "json")
+	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "json")
 }
 
 func JSON(URL *models.URL) (assets, outlinks []*models.URL, err error) {
 	defer URL.RewindBody()
 
-	rawURLs, err := GetURLsFromJSON(json.NewDecoder(URL.GetBody()))
+	bodyBytes := make([]byte, URL.GetBody().Len())
+	_, err = URL.GetBody().Read(bodyBytes)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	rawURLs, err := GetURLsFromJSON(bodyBytes)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -36,9 +41,9 @@ func JSON(URL *models.URL) (assets, outlinks []*models.URL, err error) {
 	return assets, outlinks, nil
 }
 
-func GetURLsFromJSON(decoder *json.Decoder) ([]string, error) {
+func GetURLsFromJSON(body []byte) ([]string, error) {
 	var data interface{}
-	err := decoder.Decode(&data)
+	err := json.Unmarshal(body, &data)
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/pkg/postprocessor/extractor/s3.go b/internal/pkg/postprocessor/extractor/s3.go
index 9098b9b5..431107d9 100644
--- a/internal/pkg/postprocessor/extractor/s3.go
+++ b/internal/pkg/postprocessor/extractor/s3.go
@@ -3,6 +3,7 @@ package extractor
 import (
 	"encoding/xml"
 	"fmt"
+	"io"
 	"net/url"
 
 	"github.com/internetarchive/Zeno/internal/pkg/utils"
@@ -36,7 +37,7 @@ type S3Object struct {
 }
 
 type CommonPrefix struct {
-	Prefix []string `xml:"Prefix"`
+	Prefix string `xml:"Prefix"`
 }
 
 // IsS3 checks if the response is from an S3 server
@@ -44,107 +45,78 @@ func IsS3(URL *models.URL) bool {
 	return utils.StringContainsSliceElements(URL.GetResponse().Header.Get("Server"), validS3Servers)
 }
 
-// S3 decides which helper to call based on the query param: old style (no list-type=2) vs. new style (list-type=2)
+// S3 takes an initial response and returns URLs of either files or prefixes at the current level,
+// plus continuation URL if more results exist
 func S3(URL *models.URL) ([]*models.URL, error) {
 	defer URL.RewindBody()
 
-	// Decode XML result
+	bodyBytes, err := io.ReadAll(URL.GetBody())
+	if err != nil {
+		return nil, fmt.Errorf("error reading response body: %v", err)
+	}
+
 	var result S3ListBucketResult
-	if err := xml.NewDecoder(URL.GetBody()).Decode(&result); err != nil {
-		return nil, fmt.Errorf("error decoding S3 XML: %v", err)
+	if err := xml.Unmarshal(bodyBytes, &result); err != nil {
+		return nil, fmt.Errorf("error parsing XML: %v", err)
 	}
 
-	// Prepare base data
+	// Extract base URL from the response URL
 	reqURL := URL.GetRequest().URL
-	listType := reqURL.Query().Get("list-type")
-
-	// Build https://<host> as the base for direct file links
-	baseStr := fmt.Sprintf("https://%s", reqURL.Host)
-	parsedBase, err := url.Parse(baseStr)
+	requestQuery := reqURL.Query()
+	baseURL := fmt.Sprintf("https://%s", reqURL.Host)
+	parsedBase, err := url.Parse(baseURL)
 	if err != nil {
 		return nil, fmt.Errorf("invalid base URL: %v", err)
 	}
 
-	var outlinkStrings []string
-
-	// Delegate to old style or new style
-	if listType != "2" {
-		// Old style S3 listing, uses marker
-		outlinkStrings = s3Legacy(reqURL, parsedBase, result)
-	} else {
-		// New style listing (list-type=2), uses continuation token and/or CommonPrefixes
-		outlinkStrings = s3V2(reqURL, parsedBase, result)
-	}
-
-	// Convert from []string -> []*models.URL
-	var outlinks []*models.URL
-	for _, link := range outlinkStrings {
-		outlinks = append(outlinks, &models.URL{Raw: link})
-	}
-	return outlinks, nil
-}
-
-// s3Legacy handles the old ListObjects style, which uses `marker` for pagination.
-func s3Legacy(reqURL *url.URL, parsedBase *url.URL, result S3ListBucketResult) []string {
-	var outlinks []string
+	var URLs []string
 
-	// If there are objects in <Contents>, create a "next page" URL using `marker`
-	if len(result.Contents) > 0 {
-		lastKey := result.Contents[len(result.Contents)-1].Key
+	// Ensure we can add marker
+	// ListObjects
+	if requestQuery.Get("list-type") != "2" && len(result.Contents) > 0 {
+		// If we can, iterate through S3 using the marker field
 		nextURL := *reqURL
 		q := nextURL.Query()
-		q.Set("marker", lastKey)
+		q.Set("marker", result.Contents[len(result.Contents)-1].Key)
 		nextURL.RawQuery = q.Encode()
-		outlinks = append(outlinks, nextURL.String())
-	}
-
-	// Produce direct file links for each object
-	for _, obj := range result.Contents {
-		if obj.Size > 0 {
-			fileURL := *parsedBase
-			fileURL.Path += "/" + obj.Key
-			outlinks = append(outlinks, fileURL.String())
-		}
+		URLs = append(URLs, nextURL.String())
 	}
 
-	return outlinks
-}
-
-// s3V2 handles the new ListObjectsV2 style, which uses `continuation-token` and can return CommonPrefixes.
-func s3V2(reqURL *url.URL, parsedBase *url.URL, result S3ListBucketResult) []string {
-	var outlinks []string
-
-	// If we have common prefixes => "subfolders"
+	// If we are using list-type 2/ListObjectsV2
 	if len(result.CommonPrefixes) > 0 {
 		for _, prefix := range result.CommonPrefixes {
-			// Create a URL for each common prefix (subfolder)
-			for _, p := range prefix.Prefix {
-				nextURL := *reqURL
-				q := nextURL.Query()
-				q.Set("prefix", p)
-				nextURL.RawQuery = q.Encode()
-				outlinks = append(outlinks, nextURL.String())
-			}
+			nextURL := *reqURL
+			q := nextURL.Query()
+			q.Set("prefix", prefix.Prefix)
+			nextURL.RawQuery = q.Encode()
+			URLs = append(URLs, nextURL.String())
 		}
 	} else {
-		// Otherwise, we have actual objects in <Contents>
+		// Otherwise return file URLs
 		for _, obj := range result.Contents {
 			if obj.Size > 0 {
 				fileURL := *parsedBase
 				fileURL.Path += "/" + obj.Key
-				outlinks = append(outlinks, fileURL.String())
+				URLs = append(URLs, fileURL.String())
 			}
 		}
 	}
 
-	// If truncated => add a link with continuation-token
+	// If there's a continuation token, add the continuation URL
 	if result.IsTruncated && result.NextContinuationToken != "" {
 		nextURL := *reqURL
 		q := nextURL.Query()
 		q.Set("continuation-token", result.NextContinuationToken)
 		nextURL.RawQuery = q.Encode()
-		outlinks = append(outlinks, nextURL.String())
+		URLs = append(URLs, nextURL.String())
+	}
+
+	var outlinks []*models.URL
+	for _, extractedURL := range URLs {
+		outlinks = append(outlinks, &models.URL{
+			Raw:  extractedURL,
+		})
 	}
 
-	return outlinks
+	return outlinks, nil
 }
diff --git a/internal/pkg/postprocessor/extractor/s3_test.go b/internal/pkg/postprocessor/extractor/s3_test.go
deleted file mode 100644
index feda3d57..00000000
--- a/internal/pkg/postprocessor/extractor/s3_test.go
+++ /dev/null
@@ -1,170 +0,0 @@
-package extractor
-
-import (
-	"net/http"
-	"net/url"
-	"os"
-	"strings"
-	"testing"
-
-	"github.com/CorentinB/warc/pkg/spooledtempfile"
-	"github.com/internetarchive/Zeno/pkg/models"
-)
-
-// TestIsS3 checks the Server header for known S3 strings.
-func TestIsS3(t *testing.T) {
-	tests := []struct {
-		name   string
-		server string
-		want   bool
-	}{
-		{"AmazonS3", "AmazonS3", true},
-		{"WasabiS3", "WasabiS3", true},
-		{"AliyunOSS", "AliyunOSS", true},
-		{"No match", "Apache", false},
-		{"Partial match", "Amazon", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Create a *models.URL with the response Server header set
-			URLObj := &models.URL{}
-
-			URLObj.SetResponse(&http.Response{
-				Header: http.Header{
-					"Server": []string{tt.server},
-				},
-			})
-
-			got := IsS3(URLObj)
-			if got != tt.want {
-				t.Errorf("IsS3(server=%q) = %v, want %v", tt.server, got, tt.want)
-			}
-		})
-	}
-}
-
-func TestS3(t *testing.T) {
-	// This subtest shows a scenario of a valid XML with a single object,
-	// and list-type != 2 => "marker" logic should be used.
-	t.Run("Valid XML with single object, no list-type=2 => marker next link", func(t *testing.T) {
-		xmlBody := `
-<ListBucketResult>
-	<Contents>
-		<Key>file1.txt</Key>
-		<LastModified>2021-01-01T12:00:00.000Z</LastModified>
-		<Size>123</Size>
-	</Contents>
-	<IsTruncated>false</IsTruncated>
-</ListBucketResult>`
-
-		// Build an http.Request with a query param that is NOT list-type=2
-		reqURL, _ := url.Parse("https://example.com/?someparam=1")
-
-		// Create your models.URL instance.
-		URLObj := &models.URL{}
-		URLObj.SetRequest(&http.Request{URL: reqURL})
-
-		// Likewise, set the HTTP response header using SetResponse.
-		// We want to simulate an S3 server for these tests.
-		URLObj.SetResponse(&http.Response{
-			Header: http.Header{
-				"Server": []string{"AmazonS3"},
-			},
-		})
-
-		spooledTempFile := spooledtempfile.NewSpooledTempFile("test", os.TempDir(), 2048, false, -1)
-		spooledTempFile.Write([]byte(xmlBody))
-
-		URLObj.SetBody(spooledTempFile)
-
-		outlinks, err := S3(URLObj)
-		if err != nil {
-			t.Fatalf("S3() returned unexpected error: %v", err)
-		}
-
-		if len(outlinks) != 2 {
-			t.Fatalf("expected 2 outlinks, got %d", len(outlinks))
-		}
-		expectedOutlinks := []string{
-			"https://example.com/?marker=file1.txt&someparam=1",
-			"https://example.com/file1.txt",
-		}
-		for i, outlink := range outlinks {
-			if outlink.Raw != expectedOutlinks[i] {
-				t.Errorf("expected %s, got %s", expectedOutlinks[i], outlink.Raw)
-			}
-		}
-	})
-
-	// Another subtest example: common prefixes => subfolder links for list-type=2
-	t.Run("Valid XML with common prefixes => subfolder links (list-type=2)", func(t *testing.T) {
-		xmlBody := `
-<ListBucketResult>
-    <IsTruncated>false</IsTruncated>
-    <CommonPrefixes>
-        <Prefix>folder1/</Prefix>
-        <Prefix>folder2/</Prefix>
-    </CommonPrefixes>
-</ListBucketResult>`
-
-		reqURL, _ := url.Parse("https://example.com/?list-type=2")
-
-		URLObj := &models.URL{}
-		URLObj.SetRequest(&http.Request{URL: reqURL})
-		URLObj.SetResponse(&http.Response{
-			Header: http.Header{
-				"Server": []string{"AmazonS3"},
-			},
-		})
-
-		spooledTempFile := spooledtempfile.NewSpooledTempFile("test", os.TempDir(), 2048, false, -1)
-		spooledTempFile.Write([]byte(xmlBody))
-
-		URLObj.SetBody(spooledTempFile)
-
-		outlinks, err := S3(URLObj)
-		if err != nil {
-			t.Fatalf("S3() returned unexpected error: %v", err)
-		}
-
-		if len(outlinks) != 2 {
-			t.Fatalf("expected 2 outlinks, got %d", len(outlinks))
-		}
-		if !strings.Contains(outlinks[0].Raw, "prefix=folder1%2F") {
-			t.Errorf("expected prefix=folder1/ in outlink, got %s", outlinks[0].Raw)
-		}
-		if !strings.Contains(outlinks[1].Raw, "prefix=folder2%2F") {
-			t.Errorf("expected prefix=folder2/ in outlink, got %s", outlinks[1].Raw)
-		}
-	})
-
-	// Example for invalid XML
-	t.Run("Invalid XML => error", func(t *testing.T) {
-		xmlBody := `<ListBucketResult><BadTag`
-
-		reqURL, _ := url.Parse("https://example.com/?list-type=2")
-
-		URLObj := &models.URL{}
-		URLObj.SetRequest(&http.Request{URL: reqURL})
-		URLObj.SetResponse(&http.Response{
-			Header: http.Header{
-				"Server": []string{"AmazonS3"},
-			},
-		})
-
-		spooledTempFile := spooledtempfile.NewSpooledTempFile("test", os.TempDir(), 2048, false, -1)
-		spooledTempFile.Write([]byte(xmlBody))
-
-		URLObj.SetBody(spooledTempFile)
-
-		outlinks, err := S3(URLObj)
-		if err == nil {
-			t.Fatalf("expected error for invalid XML, got none")
-		}
-
-		if len(outlinks) != 0 {
-			t.Errorf("expected no outlinks on error, got %v", outlinks)
-		}
-	})
-}
diff --git a/internal/pkg/postprocessor/extractor/xml.go b/internal/pkg/postprocessor/extractor/xml.go
index 47857ad5..11f37d4a 100644
--- a/internal/pkg/postprocessor/extractor/xml.go
+++ b/internal/pkg/postprocessor/extractor/xml.go
@@ -14,80 +14,18 @@ import (
 var sitemapMarker = []byte("sitemaps.org/schemas/sitemap/")
 
 func IsXML(URL *models.URL) bool {
-	return (isContentType(URL.GetResponse().Header.Get("Content-Type"), "xml") || strings.Contains(URL.GetMIMEType().String(), "xml")) && !IsSitemapXML(URL) && !URL.GetMIMEType().Is("image/svg+xml")
+	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "xml") && !IsSitemapXML(URL) && !URL.GetMIMEType().Is("image/svg+xml")
 }
 
 func IsSitemapXML(URL *models.URL) bool {
 	defer URL.RewindBody()
 
-	decoder := xml.NewDecoder(URL.GetBody())
-	decoder.Strict = false
-
-	for {
-		tok, err := decoder.RawToken()
-		if err == io.EOF {
-			// We've read the entire XML, no match found
-			break
-		}
-		if err != nil {
-			// If there's any parsing error, we consider it not a sitemap
-			return false
-		}
-
-		switch t := tok.(type) {
-
-		// --- TEXT-LIKE tokens ---
-		case xml.CharData:
-			// Normal text content
-			if bytes.Contains(t, sitemapMarker) {
-				return true
-			}
-		case xml.Comment:
-			// <!-- comment content -->
-			if bytes.Contains(t, sitemapMarker) {
-				return true
-			}
-		case xml.Directive:
-			// <!DOCTYPE or <!ENTITY ...>
-			if bytes.Contains(t, sitemapMarker) {
-				return true
-			}
-		case xml.ProcInst:
-			// <?xml-stylesheet ...?>
-			// t.Target is string, t.Inst is []byte
-			if bytes.Contains(t.Inst, sitemapMarker) {
-				return true
-			}
-
-		// --- ELEMENT tokens ---
-		case xml.StartElement:
-			// 1) Check element's namespace or local name
-			//    e.g. <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-			//    t.Name.Space could be "http://www.sitemaps.org/schemas/sitemap/0.9"
-			//    t.Name.Local might be "urlset"
-			//
-			//    But in practice, many sitemap docs have the namespace in the default XMLNS,
-			//    so we should also check attributes.
-			if strings.Contains(t.Name.Space, string(sitemapMarker)) {
-				return true
-			}
-			if strings.Contains(t.Name.Local, string(sitemapMarker)) {
-				return true
-			}
-
-			// 2) Check attributes (common place for the sitemap XMLNS)
-			for _, attr := range t.Attr {
-				if strings.Contains(attr.Value, string(sitemapMarker)) {
-					return true
-				}
-			}
-
-		case xml.EndElement:
-			// EndElement typically has no textual data, so nothing to check
-			continue
-		}
+	xmlBody, err := io.ReadAll(URL.GetBody())
+	if err != nil {
+		return false
 	}
-	return false
+
+	return isContentType(URL.GetResponse().Header.Get("Content-Type"), "xml") && bytes.Contains(xmlBody, sitemapMarker)
 }
 
 func XML(URL *models.URL) (assets, outlinks []*models.URL, err error) {
diff --git a/internal/pkg/postprocessor/extractor/xml_test.go b/internal/pkg/postprocessor/extractor/xml_test.go
index 7cce0711..4896a5ff 100644
--- a/internal/pkg/postprocessor/extractor/xml_test.go
+++ b/internal/pkg/postprocessor/extractor/xml_test.go
@@ -4,12 +4,10 @@ import (
 	"bytes"
 	"io"
 	"net/http"
-	"net/url"
 	"os"
 	"strings"
 	"testing"
 
-	"github.com/CorentinB/warc/pkg/spooledtempfile"
 	"github.com/internetarchive/Zeno/internal/pkg/archiver"
 	"github.com/internetarchive/Zeno/pkg/models"
 )
@@ -157,131 +155,3 @@ func TestXML(t *testing.T) {
 		})
 	}
 }
-
-// TestIsSitemapXML covers multiple scenarios.
-func TestIsSitemapXML(t *testing.T) {
-	tests := []struct {
-		name    string
-		xmlData string
-		want    bool
-	}{
-		{
-			name: "Valid sitemap XML",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-					<url>
-						<loc>https://example.com/page1</loc>
-					</url>
-				</urlset>`,
-			want: true,
-		},
-		{
-			name: "Invalid sitemap XML",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<root>
-					<element>Not a sitemap</element>
-				</root>`,
-			want: false,
-		},
-		{
-			name: "Sitemap XML with comment containing marker",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<!-- http://www.sitemaps.org/schemas/sitemap/0.9 -->
-				<root>
-					<element>Not a sitemap</element>
-				</root>`,
-			want: true,
-		},
-		{
-			name: "Sitemap XML with directive containing marker",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<!DOCTYPE root SYSTEM "http://www.sitemaps.org/schemas/sitemap/0.9">
-				<root>
-					<element>Not a sitemap</element>
-				</root>`,
-			want: true,
-		},
-		{
-			name: "Sitemap XML with processing instruction containing marker",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<?xml-stylesheet type="text/xsl" href="http://www.sitemaps.org/schemas/sitemap/0.9"?>
-				<root>
-					<element>Not a sitemap</element>
-				</root>`,
-			want: true,
-		},
-		{
-			name: "Sitemap XML with nested elements containing marker",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<root xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-					<element>Not a sitemap</element>
-				</root>`,
-			want: true,
-		},
-		{
-			name: "Sitemap XML with attributes containing marker",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<root>
-					<element xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">Not a sitemap</element>
-				</root>`,
-			want: true,
-		},
-		{
-			name:    "Empty XML content",
-			xmlData: ``,
-			want:    false,
-		},
-		{
-			name: "Large sitemap XML content",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + strings.Repeat(`<url><loc>https://example.com/page</loc></url>`, 1000) + `</urlset>`,
-			want: true,
-		},
-		{
-			name: "Sitemap XML with special characters in namespace",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9?param=1&amp;other=2">
-					<url>
-						<loc>https://example.com/page</loc>
-					</url>
-				</urlset>`,
-			want: true,
-		},
-		{
-			name: "Sitemap XML with special characters in URLs",
-			xmlData: `<?xml version="1.0" encoding="UTF-8"?>
-				<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-					<url>
-						<loc>https://example.com/page?param=1&amp;other=2</loc>
-					</url>
-				</urlset>`,
-			want: true,
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			// Construct a minimal FakeURL with your test data as body
-			URLObj := &models.URL{}
-			URLObj.SetRequest(&http.Request{URL: &url.URL{Scheme: "http", Host: "example.com"}})
-
-			// Likewise, set the HTTP response header using SetResponse.
-			// We want to simulate an S3 server for these tests.
-			URLObj.SetResponse(&http.Response{
-				Header: http.Header{
-					"Server": []string{"AmazonS3"},
-				},
-			})
-
-			spooledTempFile := spooledtempfile.NewSpooledTempFile("test", os.TempDir(), 2048, false, -1)
-			spooledTempFile.Write([]byte(tc.xmlData))
-
-			URLObj.SetBody(spooledTempFile)
-
-			got := IsSitemapXML(URLObj)
-			if got != tc.want {
-				t.Errorf("IsSitemapXML(%q) = %v, want %v", tc.xmlData, got, tc.want)
-			}
-		})
-	}
-}
diff --git a/internal/pkg/postprocessor/item.go b/internal/pkg/postprocessor/item.go
index d6a3702e..596f6c20 100644
--- a/internal/pkg/postprocessor/item.go
+++ b/internal/pkg/postprocessor/item.go
@@ -1,8 +1,6 @@
 package postprocessor
 
 import (
-	"strings"
-
 	"github.com/google/uuid"
 	"github.com/internetarchive/Zeno/internal/pkg/config"
 	"github.com/internetarchive/Zeno/internal/pkg/log"
@@ -77,10 +75,6 @@ func postprocessItem(item *models.Item) []*models.Item {
 		logger.Debug("item is a child and it's depth (without redirections) is more than 2", "item_id", item.GetShortID())
 		item.SetStatus(models.ItemCompleted)
 		return outlinks
-	} else if !domainscrawl.Enabled() && (item.GetDepthWithoutRedirections() == 1 && strings.Contains(item.GetURL().GetMIMEType().String(), "html")) {
-		logger.Debug("HTML got extracted as asset, skipping", "item_id", item.GetShortID())
-		item.SetStatus(models.ItemCompleted)
-		return outlinks
 	} else if config.Get().DisableAssetsCapture && !domainscrawl.Enabled() {
 		logger.Debug("assets capture and domains crawl are disabled", "item_id", item.GetShortID())
 		item.SetStatus(models.ItemCompleted)