From 8e49015c49cb220ce0839bef94a07c933d6b14de Mon Sep 17 00:00:00 2001 From: Yash Date: Sun, 12 Mar 2017 15:06:21 -0400 Subject: [PATCH] cleared up some code --- src/Scraper.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Scraper.py b/src/Scraper.py index 7439ff2..6604e87 100644 --- a/src/Scraper.py +++ b/src/Scraper.py @@ -31,16 +31,17 @@ for link in soup.find_all('a'): small_link.append(link['href']) #getting the link without the base URL links.append(urljoin(baseURL,link['href'])) #joining the base and the relative URL - + + #getting the links that we are intrested in i.e. .txt .pdf and .cpp correctRelativeURL=[] correctCompletedURL=[] for item in small_link: - if (".txt" in item or ".pdf" in item or ".cpp" in item): + if ".txt" in item or ".pdf" in item or ".cpp" in item: correctRelativeURL.append(item) for item in links: - if (".txt" in item or ".pdf" in item or ".cpp" in item or ".h" in item): + if ".txt" in item or ".pdf" in item or ".cpp" in item or ".h" in item: correctCompletedURL.append(item) #looping through the links, downloading files