remove(list = ls())
Day 6 Homework
Cleaning up
Importing data set
This data set contains statistics, in arrests per 100,000 residents for assault, murder, and rape in each of the 50 US states in 1973. Also given is the percent of the population living in urban areas.
library(help = "datasets")
?USArrests USArrests
Murder Assault UrbanPop Rape
Alabama 13.2 236 58 21.2
Alaska 10.0 263 48 44.5
Arizona 8.1 294 80 31.0
Arkansas 8.8 190 50 19.5
California 9.0 276 91 40.6
Colorado 7.9 204 78 38.7
Connecticut 3.3 110 77 11.1
Delaware 5.9 238 72 15.8
Florida 15.4 335 80 31.9
Georgia 17.4 211 60 25.8
Hawaii 5.3 46 83 20.2
Idaho 2.6 120 54 14.2
Illinois 10.4 249 83 24.0
Indiana 7.2 113 65 21.0
Iowa 2.2 56 57 11.3
Kansas 6.0 115 66 18.0
Kentucky 9.7 109 52 16.3
Louisiana 15.4 249 66 22.2
Maine 2.1 83 51 7.8
Maryland 11.3 300 67 27.8
Massachusetts 4.4 149 85 16.3
Michigan 12.1 255 74 35.1
Minnesota 2.7 72 66 14.9
Mississippi 16.1 259 44 17.1
Missouri 9.0 178 70 28.2
Montana 6.0 109 53 16.4
Nebraska 4.3 102 62 16.5
Nevada 12.2 252 81 46.0
New Hampshire 2.1 57 56 9.5
New Jersey 7.4 159 89 18.8
New Mexico 11.4 285 70 32.1
New York 11.1 254 86 26.1
North Carolina 13.0 337 45 16.1
North Dakota 0.8 45 44 7.3
Ohio 7.3 120 75 21.4
Oklahoma 6.6 151 68 20.0
Oregon 4.9 159 67 29.3
Pennsylvania 6.3 106 72 14.9
Rhode Island 3.4 174 87 8.3
South Carolina 14.4 279 48 22.5
South Dakota 3.8 86 45 12.8
Tennessee 13.2 188 59 26.9
Texas 12.7 201 80 25.5
Utah 3.2 120 80 22.9
Vermont 2.2 48 32 11.2
Virginia 8.5 156 63 20.7
Washington 4.0 145 73 26.2
West Virginia 5.7 81 39 9.3
Wisconsin 2.6 53 66 10.8
Wyoming 6.8 161 60 15.6
library(visdat)
library(stargazer)
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
vis_dat(USArrests)
stargazer(USArrests, type = "text")
==========================================
Statistic N Mean St. Dev. Min Max
------------------------------------------
Murder 50 7.788 4.356 0.800 17.400
Assault 50 170.760 83.338 45 337
UrbanPop 50 65.540 14.475 32 91
Rape 50 21.232 9.366 7.300 46.000
------------------------------------------
Bivariate Regression
<-
reg1 lm(data = USArrests,
formula = Assault ~ UrbanPop )
summary(reg1)
Call:
lm(formula = Assault ~ UrbanPop, data = USArrests)
Residuals:
Min 1Q Median 3Q Max
-150.78 -61.85 -18.68 58.05 196.85
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 73.0766 53.8508 1.357 0.1811
UrbanPop 1.4904 0.8027 1.857 0.0695 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 81.33 on 48 degrees of freedom
Multiple R-squared: 0.06701, Adjusted R-squared: 0.04758
F-statistic: 3.448 on 1 and 48 DF, p-value: 0.06948
Slope
<- USArrests$UrbanPop #assign the variables to an object (x)
x <- USArrests$Assault #assign the variables to an object (y)
y
#cov(x, y) / var(x)
cov(USArrests$UrbanPop, USArrests$Assault) / var(USArrests$UrbanPop)
[1] 1.49044
Slope = 1.49044
Nvidia Stock
remove(list = ls())
library(tidyquant)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
✔ quantmod 0.4.28 ✔ xts 0.14.1
── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
✖ zoo::as.Date() masks base::as.Date()
✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
✖ PerformanceAnalytics::legend() masks graphics::legend()
✖ quantmod::summary() masks base::summary()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
Registered S3 method overwritten by 'tsibble':
method from
as_tibble.grouped_df dplyr
── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
✔ tibble 3.3.0 ✔ tsibble 1.1.6
✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
✔ tidyr 1.3.1 ✔ feasts 0.4.1
✔ lubridate 1.9.4 ✔ fable 0.4.1
✔ ggplot2 3.5.2
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date() masks base::date()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::first() masks xts::first()
✖ tsibble::index() masks zoo::index()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval() masks lubridate::interval()
✖ dplyr::lag() masks stats::lag()
✖ dplyr::last() masks xts::last()
✖ tsibble::setdiff() masks base::setdiff()
✖ tsibble::union() masks base::union()
✖ fable::VAR() masks tidyquant::VAR()
Attaching package: 'fpp3'
The following object is masked from 'package:PerformanceAnalytics':
prices
?tq_get<-
df_daily tq_get(x = "NVDA",
get = "stock.prices",
from = "2000-01-01")
<- df_daily %>%
nvda_data_monthly mutate(month = yearmonth(date)
%>%
) group_by(month) %>%
summarise(adjusted = mean(adjusted)
%>%
) as_tsibble(index = month)
write.csv(x = nvda_data_monthly,
file = "nvda_monthly_data.csv"
)
<- nvda_data_monthly[1:323,] # 80% of original data
train <- nvda_data_monthly[324:404,] # 20% of original data test
<- train %>% filter(!is.na(month)) # remove NA values from month
train <- as_tsibble(train, index = month) # convert to a tsibble
train
::model
?fabletools
?fabletools# Fit models
<- model(
models_nvda .data = train,
# ETS = ETS(adjusted),
Drift = RW(adjusted ~ drift()),
NAIVE = NAIVE(adjusted),
SNAIVE = SNAIVE(adjusted)
)
# Forecast
<- nrow(test)
h <- forecast(models_nvda, h = h)
fc_nvdaa
autoplot(fc_nvdaa, train) + labs(tittle = "My Forecast", xlab = "Time", ylab = "Adjusted Prices")