From 4f5c9c25dbc5a2f76edcc2627e18d0dfad6f2163 Mon Sep 17 00:00:00 2001 From: Erik Lopez Date: Mon, 1 Jun 2020 17:30:38 -0400 Subject: [PATCH] done --- el3220_Final.R | 241 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 el3220_Final.R diff --git a/el3220_Final.R b/el3220_Final.R new file mode 100644 index 0000000..b94e303 --- /dev/null +++ b/el3220_Final.R @@ -0,0 +1,241 @@ +# 3135 May Intensive: Final Examination. + +# You may use all materials presented in class, as well as online resources. +# You may not, however, assist other students in this course with their answers. +# You are bound by NYUSPS Policy on Academic Integrity and Plagiarism. +# Submit to NYU Classes as: [NetID]_Final.R. + +library(quantmod) +library(stargazer) +library(fredr) +library(tidyr) +library(PASWR2) +library(MASS) +library(repmis) +library(latex2exp) +library(dplyr) +library(ggplot2) +library(tidyverse) +library(RCurl) +library(haven) +library(forecast) +library(depmixS4) +library(mFilter) +fredr_set_key('30e6ecb242a73869e11cb35f6aa3afc3') # My key, please don't abuse. + +getwd() +setwd("C:/Users/erikl/OneDrive/Documents/NYU_CUSP/Summer2020/REDA") + +# Question 1. +# Consider the following sequence of numbers, called x1: 6 2 9 7 7 1 9 9 6 5 6 1 4 7 9 1 0 5 5 3 +# Using R, calculate the mean, variance and standard deviation of x1. +# Consider another sequence of numbers, called x2: 9 5 8 1 9 2 2 6 9 8 5 1 0 2 7 4 9 7 0 9 +# Using R, calculate the covariance and correlation between x1 and x2. +# Using R, run the bivariate linear model that regresses x2 on x1. +# Based on a null hypothesis of no relationship, what is the 95% confidence interval on the slope coefficient? +# Would you reject or fail to reject the null hypothesis at this level of confidence? + +x1 <- c(6, 2, 9, 7, 7, 1, 9, 9, 6, 5, 6, 1, 4, 7, 9, 1, 0, 5, 5, 3) + +mean(x1) +var(x1) +sd(x1) + +x2 <- c( 9, 5, 8, 1, 9, 2, 2, 6, 9, 8, 5, 1, 0, 2, 7, 4, 9, 7, 0, 9) + +cov(x1, x2) +cor(x1, x2) + +ols = lm(x1 ~ x2) +stargazer(ols, type="text", title="OLS Results", single.row=TRUE, + ci=TRUE, ci.level=0.95) + +## Slope CI at 95%: 0.105 (-0.296, 0.506) +## We would FAIL to reject the null hypothesis because the zero slope is included in the CI, showing no relation between x1 and x2. + + +# Question 2. +# For a publicly-traded stock of your choice, download data on the adjusted closing + # stock price, as well as the market (or exchange) on which it trades between 2006 and 2017. +# Calculate the daily returns for both the stock and the market on which it trades. +# Generate graphs of closing adjusted prices for both. +# Generate graphs of returns over time for both. +# Generate histograms of returns for both. +# Using a bivariate linear regression, estimate the CAPM presented in class. +# Assuming the EMH, test the basic CAPM hypotheses using your data. +# Integrate the Fama-French 5 Factor data presented in class. +# Estimate the Fame-French 3-facotr and 5-factor models. +# Describe how your results change as you move from the 3- to the 5-factor model, addressing changes in the adjusted beta, +# as well as changes in conclusions regarding hypothesis testing of the adjusted beta. + +getSymbols(c('GS','^NYA'), from="2006-01-01", to="2017-12-31") +data = merge(as.zoo(GS$GS.Adjusted), as.zoo(NYA$NYA.Adjusted)) +names = c("GS", "NYA") +colnames(data) = names + +data.level = as.xts(data) ## Levels +data.returns = diff(log(data.level), lag=1) ## Log returns +data.returns = na.omit(data.returns) ## Dump missing values + +plot(GS$GS.Adjusted, main="Goldman Sachs Closing Prices", col = "darkgreen") +plot(NYA$NYA.Adjusted, main="NYSE Closing Prices", col = "darkgreen") + +plot(data.returns$GS, main="Goldman Sachs Daily Returns", col = "darkgreen") +plot(data.returns$NYA, main="NYSE Daily Returns", col = "darkgreen") + +hist(data.returns$GS, breaks=100, col="darkblue", freq=F, + main="Histogram of GS Daily Returns (2006-2017)", xlab="Daily Returns", + xlim=c(-.2, .2)) +abline(v=0, col="red") + +hist(data.returns$NYA, breaks=100, col="darkblue", freq=F, + main="Histogram of GS Daily Returns (2006-2017)", xlab="Daily Returns", + xlim=c(-.2, .2)) +abline(v=0, col="red") + +plot.ts(data.returns$NYA, data.returns$GS, pch=16, + col="darkblue", main="CAPM Data", xlab="Returns of NYSE", + ylab="Returns of GS") ## time series plot in R +grid(lw = 2) +abline(lm(data.returns$GS ~ data.returns$NYA), col="red") ## I added the best fit line so the graph looks similar to that presented in class for Apple.library(ggplot2) + +capm.ols = lm(data.returns$GS ~ data.returns$NYA) +stargazer(capm.ols, type="text", title="CAPM Results", single.row=TRUE, + ci=TRUE, ci.level=0.99) +## Results +##NYA 1.350*** (1.291, 1.409) +##Constant 0.0001 (-0.001, 0.001) +## Assuming EMH, our NULL HYPOTHESIS: A == 0 && B == 1, ALT HYPOTHESIS: A != 0 || B != 1, where A is the constant and B is the slope +## Using a 99% confidence interval we REJECT the null hypothesis +## Although Zero is within the CI for Alpha, Beta's CI doesn't include a slope of 1, indicating that the GS stock is aggressive with no excess returns. + +ff5f = read_dta("FF5F.dta") # Data are stored in multiple formats. +ff5f$mktrf = ff5f$mktrf / 100 +ff5f$smb = ff5f$smb / 100 +ff5f$hml = ff5f$hml / 100 +ff5f$rmw = ff5f$rmw / 100 +ff5f$cma = ff5f$cma / 100 +ff5f$rf = ff5f$rf / 100 + +a = xts(x=ff5f, order.by = ff5f$date) +b = GS$GS.Adjusted +b = diff(log(b), lag=1) +b = na.omit(b) + +merged_data = merge(a, b, join='right') ## Merge into single time-series dataset +names = c("date", "mktrf", "smb", "hml", "rmw", "cma", "rf", "gs") +colnames(merged_data) = names +merged_data = merged_data[, colnames(merged_data) != "date"] # Narrow dataframe + +ff3f.ols = lm('gs - rf ~ mktrf + smb + hml', data = merged_data) +stargazer(ff3f.ols, type="text", title="FF3F Results: GS", single.row=TRUE, + ci=TRUE, ci.level=0.95) + +ff5f.ols = lm('gs - rf ~ mktrf + smb + hml + rmw + cma', data = merged_data) +stargazer(ff5f.ols, type="text", title="FF5F Results: GS", single.row=TRUE, + ci=TRUE, ci.level=0.95) + +## Moving from 3-factor to 5-factor we see that the adjusted beta is still above (not even within CI), so again we see that +## GS stock is aggresive with no excess returns, so we REJECT the null hypothesis. +## Should be noted that GS stock isn't as aggresive once we move from 3-factor to 5-factor +## No chances in our conclusion + +# Question 3. +# Read in the Staten Island sales data presented in class. +# Suppose your focus was not interpretation but prediction, and you wanted to introduce non-linearities in the model. +# Create square and cubed variables for age. (A cube is x^3.) +# Implement your approach using R and characterize your results as you think appropriate. + +SI_Sales = read_dta("SI Sales.dta") +SI_Sales$age2 = SI_Sales$age ^2 +SI_Sales$age3 = SI_Sales$age ^3 + +cor(SI_Sales) + +model = lm('price ~ unit_size + land_size + age + age2 + age3', data = SI_Sales) +stargazer(model, type="text", title="Model", single.row=TRUE, + ci=TRUE, ci.level=0.95, digits = 2) + +## From our results we see that as time progresses, initially age will be part of the appreciation of the house. +## But eventually the age of the house becomes a liability as Age2 and Age3 start to overtake Age and negatively affect the value of a house +## But we should also note that Age2 Age3 have nonnegative values in their CI, +## perhaps showing a chance that some homes will keep appreciating regardless of the house (e.g. value of land outstrips decay of the house) +## We introduced unit-size and land_size since they're correlated to age + +# Question 4. +# Use the stock data from Question 2. +# Using the Autocorrelation Function (ACF) in the library "forecast", do you find that returns are autocorrelated? +# Calculate daily volatility of returns. +# Is volatility autocorrelated? +# Are these results consistent with the strong form of the EMH? + +getSymbols(c('GS'), from="2006-01-01", to="2017-12-31") +data = merge(as.zoo(GS$GS.Adjusted)) +names = c("GS") +colnames(data) = names +data.level = as.xts(data) ## Levels +data.returns = diff(log(data.level), lag=1) ## Log returns +data.returns = na.omit(data.returns) ## Dump missing values +data.returns$GSvol = sqrt(data.returns$GS ** 2) + +acf(data.returns$GS, lag.max=5, type=c("correlation"), + main="Autocorrelation of GS Returns") +## For GS we don't see previous days affecting today's stock daily returns, so no autocorrelation for daily returns + +acf(data.returns$GSvol, lag.max=10, + type=c("correlation"), main="Autocorrelation of GS Volatility") + +## GS stock Volatility shows that previous days do have an impact on today's stock volatility, so yes it's autocorrelated for volatility +## But there's no autocorrelation when it comes to GS stock daily returns +##These results are in line with the strong form of EMH + + +# Question 5. +# Download quarterly US GDP data from FRED as far back as you are able to go. +# Graph the data. +# Using the HP Filter, decompose these data into cyclical and trend components. +# Graph your results. +# How would you interpret the results given economic history? + +gdp = fredr('GDP', observation_start = as.Date("1947-01-01")) +plot(gdp$date, gdp$value, pch=16, col="darkblue", main="GDP", + xlab="Date", ylab="GDP ($Billions") ## time series plot in R +lines(gdp$date, gdp$value, col="darkblue") +grid(lw=2) + +hp = hpfilter(gdp$value, freq=129600, type="lambda", drift=FALSE) + +plot(gdp$date, hp$trend, pch=16, col="blue", + xlab = "Date", ylab = "Trend GDP", + main="De-Seasonalized Trend") +lines(gdp$date, hp$trend, col="blue") +grid(lw=2) + +plot(gdp$date, hp$cycle, pch=16, col="blue", + xlab = "Date", ylab = "Seasonal GDP Changes", + main="Seasonal Cyclicality") +lines(gdp$date, hp$cycle, col="blue") +grid(lw=2) + +## From my results the GDP trends up. +## But its seasonality has recently been more wild, with huge swings in growth and steep declines +## The results point to the fact that in this new millenium, steep economic collapse has been followed by strong economic growth + + +# Extra Credit. +# Ingest the 2000 Census Data for the state of New Jersey. +# Using the naming conventions in class, rename the variables. +# Plot the longitude and latitude of the centroids in New Jersey. +# Are they consistent with the population clusters of New Jersey? + +url = "http://www2.census.gov/geo/docs/maps-data/data/gazetteer/census_tracts_list_34.txt" +data = read.csv(url, header=TRUE, sep='\t') +names = c('usps', 'geo', 'pop', 'hu', 'land', 'water', + 'landSqmi', 'waterSqmi', 'lat', 'long') +colnames(data) = names +plot(data$long, data$lat, pch=16, col="blue", + main="The Garden Stte by Census Centroid", xlab="Longitude", ylab="Latitude") +grid(lw=2) +## Yes these pop clusters are consistent to what we would expect +## namely population clusters near NYC and Philadelphia and between that corridor +## plus the clusters along the beach like Atlantic City \ No newline at end of file