From 992cc5a555e4d11fe692d2da1ad0fa1c2264f1cf Mon Sep 17 00:00:00 2001
From: Yash Sharma <yash1337@users.noreply.github.com>
Date: Sat, 1 Apr 2017 23:06:10 -0400
Subject: [PATCH] Delete ScraperV2.py

---
 src/ScraperV2.py | 253 -----------------------------------------------
 1 file changed, 253 deletions(-)
 delete mode 100644 src/ScraperV2.py

diff --git a/src/ScraperV2.py b/src/ScraperV2.py
deleted file mode 100644
index d0bdb2f..0000000
--- a/src/ScraperV2.py
+++ /dev/null
@@ -1,253 +0,0 @@
-#############################
-##Project Site 'Cyclone" Scraper v2
-##This thing Scrapes everthing from the CSE 232 site videos, examples code, presentations, and project files
-##
-#############################
-
-################################################################################################################
-
-###Checking dependensies
-import re
-import requests 
-from bs4 import BeautifulSoup #getting the beasutiful soup library
-from requests.auth import HTTPBasicAuth
-from urllib.parse import urljoin
-import os
-import msvcrt as m
-# try:
-	# print("Importing other libraries")
-	# from requests.auth import HTTPBasicAuth
-	# from urllib.parse import urljoin
-	# import os
-	# import msvcrt as m
-# except:
-	# print("Just get those libraries man!!!")
-	
-# try:
-	# import requests 
-	# print ("Request library is good!! ")
-# except:
-	# print("Request Library not found. Trying to install it. If not successfull then try it manually")
-	# os.system('pip install requests')
-
-# try:
-	# from bs4 import BeautifulSoup
-	# print("BeautifulSoup library is good!! ")
-# except:
-	# print("BeautifulSoup Library not found. Trying to install it. If not successfull then try it manually")
-	# os.system('pip install beautifulsoup4')
-	
-
-	
-
-################################################################################################################
-
-################################################################################################################
-UNAME="sharmay4"
-PASSWORD="D@rkL0rd"
-################################################################################################################
-
-################################################################################################################
-
-###Actual Code
-
-baseURL='http://www.cse.msu.edu/~cse232/'
-
-def returnBeautifiedObject(link,USERNAME='',PASS=''):
-	requestObject=requests.get(link,auth=HTTPBasicAuth(USERNAME,PASS))
-	return BeautifulSoup(requestObject.text,"html.parser")
-
-def downloadFiles(URL,PATH,book_name=''):
-	if book_name=='':
-		book_name=URL.split('/')[-1]
-		
-	os.chdir(PATH)
-	
-	print("Downloading: "+book_name+" ...")
-	with open(book_name, 'wb') as book:
-		a = requests.get(URL,auth=HTTPBasicAuth(UNAME,PASSWORD))
-		for block in a.iter_content(512):
-			if not block:
-				break
-			book.write(block)
-def downloadVideos(URL,PATH,book_name=''):
-	
-		
-	r=requests.get(URL)
-	print("Downloading: "+book_name+" ...")
-	f=open(book_name,'wb');
-	print ("Donloading.....")
-	for chunk in r.iter_content(chunk_size=255): 
-		if chunk: # filter out keep-alive new chunks
-			f.write(chunk)
-	print ("Done")
-	f.close()
-	
-	
-	
-def getWorksheets(beautifiedBaseObject):
-	worksheetURL=''
-	worksheetFolderPath=os.path.dirname(os.path.abspath(__file__))+'\worksheets'
-	#print(worksheetFolderPath)	
-	if not os.path.exists(worksheetFolderPath):
-		os.makedirs(worksheetFolderPath)
-	
-	for link in beautifiedBaseObject.find_all('a'):
-		if 'Worksheets' in urljoin(baseURL,link['href']):
-			worksheetURL=urljoin(baseURL,link['href'])
-	worksheetHTMLObject=requests.get(worksheetURL)
-	completedWorksheetURL=''
-	#print(worksheetURL+'/'+'w1.cpp')
-	beautifiedWorksheetHTMLObject=BeautifulSoup(worksheetHTMLObject.text,"html.parser")	
-	
-	for link in beautifiedWorksheetHTMLObject.find_all('a'):
-		if (".pdf" in link['href'] or ".cpp" in link['href']):
-			completedWorksheetURL=worksheetURL+'/'+link['href']
-			book_name = completedWorksheetURL.split('/')[-1]
-			book_name=book_name[:1]+'orksheet'+book_name[1:]
-			worksheetName=book_name
-			worksheetFilePath=worksheetFolderPath+"\\"+book_name.split('.',1)[0]	
-			if not os.path.exists(worksheetFilePath):
-				os.makedirs(worksheetFilePath)
-			downloadFiles(completedWorksheetURL,worksheetFilePath,book_name)
-			# print("Downloading: "+book_name+" ...")
-			# with open(book_name, 'wb') as book:
-				# a = requests.get(completedWorksheetURL)
-				# for block in a.iter_content(512):
-					# if not block:
-						# break
-					# book.write(block)
-
-	
-
-def getLabs(beautifiedBaseObject):
-	labBaseURLList=[]	
-	labFolderpath=os.path.dirname(os.path.abspath(__file__))+'\labs'
-	if not os.path.exists(labFolderpath):
-		os.makedirs(labFolderpath)  
-	
-	for link in beautifiedBaseObject.find_all('a'):
-		if 'week' in link['href'] and '.pdf' not in link['href']:
-			labBaseURLList.append(urljoin(baseURL,link['href']))
-	labBaseURLList.pop(0)	#deleting the URL without week number
-	tempBeautifiedObject=None
-	beautifiedLabObject=None
-	for URL in labBaseURLList:
-		tempBeautifiedObject=returnBeautifiedObject(URL,UNAME,PASSWORD)
-		for link in tempBeautifiedObject.find_all('a'):
-			if ('lab' in link['href'] and 'Weekly' in link['href']):
-				beautifiedLabObject=returnBeautifiedObject(link['href'],UNAME,PASSWORD)
-				for labLink in beautifiedLabObject.find_all('a'):
-					if (".pdf" in labLink['href'] or ".cpp" in labLink['href'] or ".h" in labLink['href'] or "gdbinit" in labLink['href'] or '.txt' in labLink['href']):
-						labFileURL=link['href']+"/"+ labLink['href']
-						if not os.path.exists(labFolderpath+"\\"+URL.split('/')[-2]):
-							os.makedirs(labFolderpath+"\\"+URL.split('/')[-2])
-						labFilePath=labFolderpath+"\\"+URL.split('/')[-2]
-						#downloadFiles(labFileURL,labFilePath)
-
-						
-def getReadings(beautifiedBaseObject):
-	readingBaseURLList=[]
-	readingFolderPath=os.path.dirname(os.path.abspath(__file__))+'\\readings'
-	if not os.path.exists(readingFolderPath):
-		os.makedirs(readingFolderPath) 
-		
-	for link in beautifiedBaseObject.find_all('a'):
-		if 'week' in link['href'] and '.pdf' not in link['href']:
-			readingBaseURLList.append(urljoin(baseURL,link['href']))
-	readingBaseURLList.pop(0)	#deleting the URL without week number
-	tempBeautifiedObject=None
-	beautifiedReadingObject=None
-	for URL in readingBaseURLList:
-		tempBeautifiedObject=returnBeautifiedObject(URL,UNAME,PASSWORD)
-		for link in tempBeautifiedObject.find_all('a'):
-			if ('reading' in link['href']):
-				beautifiedReadingObject=returnBeautifiedObject(link['href'],UNAME,PASSWORD)
-				for readingLink in beautifiedReadingObject.find_all('a'):
-					if (".pdf" in readingLink['href'] or ".cpp" in readingLink['href'] or ".h" in readingLink['href'] or '.txt' in readingLink['href']):
-						readingFileURL=link['href']+"/"+readingLink['href']
-						if not os.path.exists(readingFolderPath+"\\"+URL.split('/')[-2]):
-							os.makedirs(readingFolderPath+'\\'+URL.split('/')[-2])
-						readingFilePath=readingFolderPath+'\\'+URL.split('/')[-2]
-						downloadFiles(readingFileURL,readingFilePath)
-						
-						
-def getVideos(beautifiedBaseObject):
-	videoBaseURLList=[]
-	videoFolderPath=os.path.dirname(os.path.abspath(__file__))+'\\videos'
-	if not os.path.exists(videoFolderPath):
-		os.makedirs(videoFolderPath)
-	
-	for link in beautifiedBaseObject.find_all('a'):
-		if 'week' in link['href'] and '.pdf' not in link['href']:
-			videoBaseURLList.append(urljoin(baseURL,link['href']))
-	videoBaseURLList.pop(0)	#deleting the URL without week number
-	tempBeautifiedObject=None
-	beautifiedVideoObject=None
-	dtDict={}
-	ulLinkSet=set()
-	for URL in videoBaseURLList:
-		tempBeautifiedObject=returnBeautifiedObject(URL,UNAME,PASSWORD)
-		for liLink in tempBeautifiedObject.find_all('a'):
-			if ('video' in liLink['href'] and '.mp4' in liLink['href']):
-				temp=URL[:-URL.index('/')]
-				if not os.path.exists(videoFolderPath+"\\"+URL.split('/')[-2]):
-					os.makedirs(videoFolderPath+"\\"+URL.split('/')[-2])
-				videoFilePath=videoFolderPath+"\\"+URL.split('/')[-2]
-				videoFileURL=temp[:temp.rfind('/')]+"/"+liLink['href']
-				bookName=liLink.text.strip()+".mp4"
-				bookName=bookName.replace('\n','').replace('\t','')
-				bookName=re.sub(' +',' ',bookName).capitalize()
-				bookName=re.sub('[^a-zA-Z0-9 \n\.]', '', bookName)
-				#print(videoFileURL)
-				#print(videoFilePath)
-				#print(bookName)
-				#print()
-				downloadFiles(videoFileURL,videoFilePath,bookName)
-				
-		
-		
-		
-	#print(ulLinkSet)
-beautifiedBaseObject=returnBeautifiedObject(baseURL)		
-#basestHTMLRequestObject=requests.get('http://www.cse.msu.edu/~cse232/')
-#beautifiedbasestHTMLRequestObject=BeautifulSoup(basestHTMLRequestObject.text,"html.parser")
-#print(beautifiedbasestHTMLRequestObject)
-#getWorksheets(beautifiedbasestHTMLRequestObject)
-#getLabs(beautifiedBaseObject)
-getVideos(beautifiedBaseObject)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-