Skip to content

Commit

Permalink
Version 1 for CSE 260
Browse files Browse the repository at this point in the history
  • Loading branch information
yash1337 authored Jun 27, 2017
1 parent bf7868c commit 3122731
Showing 1 changed file with 179 additions and 0 deletions.
179 changes: 179 additions & 0 deletions src/ScraperV1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#############################
##Project Site 'Cyclone" Scraper v2
##This thing Scrapes everthing from the CSE 232 site Videos, Worksheets, Lecture Notes, and Project Files
##You need to supply your CSE user name and Password
#############################

################################################################################################################

###Checking dependensies

import os
try:
import requests
print ("Request library is good!! ")
print()
except:
print("Request Library not found. Trying to install it. If not successfull then try it manually")
os.system('pip install requests')

try:
print("Importing other libraries")
from requests.auth import HTTPBasicAuth
from urllib.parse import urljoin
import msvcrt as m
print()
except:
print("Just get those libraries man!!!")

try:
from bs4 import BeautifulSoup
print("BeautifulSoup library is good!! ")
print()
except:
print("BeautifulSoup Library not found. Trying to install it. If not successfull then try it manually")
os.system('pip install beautifulsoup4')

import re
import requests
from bs4 import BeautifulSoup #getting the beasutiful soup library
from requests.auth import HTTPBasicAuth
from urllib.parse import urljoin
import sys
import msvcrt as m
import posixpath
import urllib.parse


################################################################################################################

################################################################################################################
UNAME="cse260"
PASSWORD="dmig4me"
################################################################################################################

################################################################################################################

###Actual Code

baseURL='http://www.cse.msu.edu/~cse260/cse260cn/'
basePath=os.path.dirname(os.path.abspath(__file__))

def returnBeautifiedObject(link,USERNAME='',PASS=''):
requestObject=requests.get(link,auth=HTTPBasicAuth(USERNAME,PASS))
return BeautifulSoup(requestObject.text,"html.parser")

def downloadFiles(URL,PATH,book_name=''):
if book_name=='':
book_name=URL.split('/')[-1]

os.chdir(PATH)

print("Downloading: "+book_name+" ...")
with open(book_name, 'wb') as book:
a = requests.get(URL,auth=HTTPBasicAuth(UNAME,PASSWORD))
for block in a.iter_content(512):
if not block:
break
book.write(block)

def getLectures(beautifiedBaseObject):
LectureBaseURLList=[]
LectureFolderPath=os.path.dirname(os.path.abspath(__file__))+'\\Lecture'

if not os.path.exists(LectureFolderPath):
os.makedirs(LectureFolderPath)

for link in beautifiedBaseObject.find_all('a'):
if 'lecture' in link['href'] and "Sample" not in link['href']:
LectureBaseURLList.append(urljoin(baseURL,link['href']))

for URL in LectureBaseURLList:
downloadFiles(URL,LectureFolderPath)

def getSampleTests(beautifiedBaseObject):
SamTestsBaseURLList=[]
SamTestsFolderPath=os.path.dirname(os.path.abspath(__file__))+'\\Sample Tests'

if not os.path.exists(SamTestsFolderPath):
os.makedirs(SamTestsFolderPath)

for link in beautifiedBaseObject.find_all('a'):
if 'lecture' in link['href'] and "Sample" in link['href']:
SamTestsBaseURLList.append(urljoin(baseURL,link['href']))

for URL in SamTestsBaseURLList:
downloadFiles(URL,SamTestsFolderPath)


def getHomework(beautifiedBaseObject):
HomeworkBaseURLList=[]
HomeworkFolderPath=os.path.dirname(os.path.abspath(__file__))+'\\Homework Assignments'

if not os.path.exists(HomeworkFolderPath):
os.makedirs(HomeworkFolderPath)

for link in beautifiedBaseObject.find_all('a'):
if 'homework' in link['href']:
HomeworkBaseURLList.append(urljoin(baseURL,link['href']))

for URL in HomeworkBaseURLList:
downloadFiles(URL,HomeworkFolderPath)

def menu():
print("The list of choices are as follows. Choose the ones you like one by one. Don't worry you will get future chances (just as in life!! :-)): \n")
print("1. All the lecture notes!\n")
print("2. All the Sample test files!\n")
print("3. All the Homework files!\n")
print("q. Nothing fam. Just get me out of here!\n")

def main():
print("Welcome to CSE232 Scraper!!!!!!\n")
menu()
userInput=(input("So what do you want (not in life. What do you want right now!!!): "))
print()
beautifiedBaseObject=returnBeautifiedObject(baseURL,UNAME,PASSWORD)
while(True):
if(userInput=='1'):
print("Alright lets get those Lecture files!!!\n")
getLectures(beautifiedBaseObject)
os.chdir(basePath)
print()
print('Alright the lecture notes are downloaded in the \"Lecture\" folder. See if you want anything else!\n' )
menu()
userInput=(input("Do you need anything else????: "))
print('\n\n')
elif(userInput=='2'):
print("Alright lets get those Sample test files!!!\n")
getSampleTests(beautifiedBaseObject)
os.chdir(basePath)
print()
print('Alright the Sample test files are downloaded in the \"Sample Tests\" folder. See if you want anything else!\n' )
menu()
userInput=(input("Do you need anything else????: "))
print('\n\n')
elif(userInput=='3'):
print("Alright lets get those Homework files!!!\n")
getHomework(beautifiedBaseObject)
os.chdir(basePath)
print()
print('Alright the Homework files are downloaded in the \"Homework Assignments\" folder. See if you want anything else!\n' )
menu()
userInput=(input("Do you need anything else????: "))
print('\n\n')
elif(userInput=='q'):
print('\n\n')
print("Alright fam! Take care! Be happy! Live long and prosper!!!!!!!!!!!!!")
print('\n\n')
break
else:
print("Seriously fam, Seriously. Pick one of the choices will ya!!\n")
os.chdir(basePath)
menu()
print("Let's try this again\n")
userInput=(input("Do you need anything else????: "))
print('\n\n')


if __name__ == "__main__":
main()

0 comments on commit 3122731

Please sign in to comment.