Day 6 Homework

Author

Cleaning up

remove(list = ls())

Importing data set

This data set contains statistics, in arrests per 100,000 residents for assault, murder, and rape in each of the 50 US states in 1973. Also given is the percent of the population living in urban areas.

library(help = "datasets")
?USArrests 
USArrests

               Murder Assault UrbanPop Rape
Alabama          13.2     236       58 21.2
Alaska           10.0     263       48 44.5
Arizona           8.1     294       80 31.0
Arkansas          8.8     190       50 19.5
California        9.0     276       91 40.6
Colorado          7.9     204       78 38.7
Connecticut       3.3     110       77 11.1
Delaware          5.9     238       72 15.8
Florida          15.4     335       80 31.9
Georgia          17.4     211       60 25.8
Hawaii            5.3      46       83 20.2
Idaho             2.6     120       54 14.2
Illinois         10.4     249       83 24.0
Indiana           7.2     113       65 21.0
Iowa              2.2      56       57 11.3
Kansas            6.0     115       66 18.0
Kentucky          9.7     109       52 16.3
Louisiana        15.4     249       66 22.2
Maine             2.1      83       51  7.8
Maryland         11.3     300       67 27.8
Massachusetts     4.4     149       85 16.3
Michigan         12.1     255       74 35.1
Minnesota         2.7      72       66 14.9
Mississippi      16.1     259       44 17.1
Missouri          9.0     178       70 28.2
Montana           6.0     109       53 16.4
Nebraska          4.3     102       62 16.5
Nevada           12.2     252       81 46.0
New Hampshire     2.1      57       56  9.5
New Jersey        7.4     159       89 18.8
New Mexico       11.4     285       70 32.1
New York         11.1     254       86 26.1
North Carolina   13.0     337       45 16.1
North Dakota      0.8      45       44  7.3
Ohio              7.3     120       75 21.4
Oklahoma          6.6     151       68 20.0
Oregon            4.9     159       67 29.3
Pennsylvania      6.3     106       72 14.9
Rhode Island      3.4     174       87  8.3
South Carolina   14.4     279       48 22.5
South Dakota      3.8      86       45 12.8
Tennessee        13.2     188       59 26.9
Texas            12.7     201       80 25.5
Utah              3.2     120       80 22.9
Vermont           2.2      48       32 11.2
Virginia          8.5     156       63 20.7
Washington        4.0     145       73 26.2
West Virginia     5.7      81       39  9.3
Wisconsin         2.6      53       66 10.8
Wyoming           6.8     161       60 15.6

library(visdat)
library(stargazer)


Please cite as:

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

vis_dat(USArrests)

stargazer(USArrests, type = "text")


==========================================
Statistic N   Mean   St. Dev.  Min   Max  
------------------------------------------
Murder    50  7.788   4.356   0.800 17.400
Assault   50 170.760  83.338   45    337  
UrbanPop  50 65.540   14.475   32     91  
Rape      50 21.232   9.366   7.300 46.000
------------------------------------------

Bivariate Regression

reg1 <- 
  lm(data = USArrests,
     formula = Assault ~ UrbanPop )
summary(reg1)


Call:
lm(formula = Assault ~ UrbanPop, data = USArrests)

Residuals:
    Min      1Q  Median      3Q     Max 
-150.78  -61.85  -18.68   58.05  196.85 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  73.0766    53.8508   1.357   0.1811  
UrbanPop      1.4904     0.8027   1.857   0.0695 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 81.33 on 48 degrees of freedom
Multiple R-squared:  0.06701,   Adjusted R-squared:  0.04758 
F-statistic: 3.448 on 1 and 48 DF,  p-value: 0.06948

Slope

x <- USArrests$UrbanPop       #assign the variables to an object (x)
y <- USArrests$Assault        #assign the variables to an object (y)

#cov(x, y) / var(x) 
cov(USArrests$UrbanPop, USArrests$Assault) / var(USArrests$UrbanPop)

[1] 1.49044

Slope = 1.49044

Nvidia Stock

remove(list = ls())
library(tidyquant)

Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo

── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
✔ quantmod             0.4.28     ✔ xts                  0.14.1
── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
✖ zoo::as.Date()                 masks base::as.Date()
✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
✖ PerformanceAnalytics::legend() masks graphics::legend()
✖ quantmod::summary()            masks base::summary()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(fpp3)

Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr
── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
✔ tibble      3.3.0     ✔ tsibble     1.1.6
✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
✔ tidyr       1.3.1     ✔ feasts      0.4.1
✔ lubridate   1.9.4     ✔ fable       0.4.1
✔ ggplot2     3.5.2     
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()    masks base::date()
✖ dplyr::filter()      masks stats::filter()
✖ dplyr::first()       masks xts::first()
✖ tsibble::index()     masks zoo::index()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval()  masks lubridate::interval()
✖ dplyr::lag()         masks stats::lag()
✖ dplyr::last()        masks xts::last()
✖ tsibble::setdiff()   masks base::setdiff()
✖ tsibble::union()     masks base::union()
✖ fable::VAR()         masks tidyquant::VAR()

Attaching package: 'fpp3'

The following object is masked from 'package:PerformanceAnalytics':

    prices

?tq_get
df_daily <-
tq_get(x = "NVDA", 
     get = "stock.prices", 
     from = "2000-01-01")

nvda_data_monthly <- df_daily %>%
  mutate(month = yearmonth(date)
         ) %>%
  group_by(month) %>%
  summarise(adjusted = mean(adjusted)
            ) %>%
  as_tsibble(index = month)

write.csv(x = nvda_data_monthly, 
          file = "nvda_monthly_data.csv"
          )

train <- nvda_data_monthly[1:323,]    # 80% of original data
test  <- nvda_data_monthly[324:404,]  # 20% of original data

train <- train %>% filter(!is.na(month))   # remove NA values from month
train <- as_tsibble(train, index = month)  # convert to a tsibble

?fabletools::model
?fabletools
# Fit models
models_nvda <- model(
  .data = train,
#  ETS    = ETS(adjusted),
  Drift  = RW(adjusted ~ drift()),
  NAIVE  = NAIVE(adjusted),
  SNAIVE = SNAIVE(adjusted)
)

# Forecast
h <- nrow(test)
fc_nvdaa <- forecast(models_nvda, h = h)


autoplot(fc_nvdaa, train) + labs(tittle = "My Forecast", xlab = "Time", ylab = "Adjusted Prices")