#Assignment 1. Regression model estimation
#Cargar librerias
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(foreign)
library(timsac)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(mFilter)
library(nlme)
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
library(lmtest)
library(broom)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(knitr)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(parallel)
library(mlogit)
## Loading required package: dfidx
##
## Attaching package: 'dfidx'
##
## The following object is masked from 'package:MASS':
##
## select
##
## The following object is masked from 'package:stats':
##
## filter
library(dplyr)
library(tidyr)
library(forecast)
##
## Attaching package: 'forecast'
##
## The following object is masked from 'package:nlme':
##
## getResponse
library(stats)
library(quantmod)
## Loading required package: xts
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Loading required package: TTR
library(foreach)
##
## Attaching package: 'foreach'
##
## The following objects are masked from 'package:purrr':
##
## accumulate, when
library(ISLR)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-7
library(pacman)
##Read a database in Rstudio or Posit Cloud
fd <- read_csv("Fish.csv")
## Rows: 159 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Species
## dbl (6): Weight, Length1, Length2, Length3, Height, Width
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(fd$Length1, fd$Weight,
xlab = "Length1", ylab = "Weight",
main = "Weight vs Length1")
correlation <- cor(fd$Length1, fd$Weight)
print(paste("Correlation Coefficient:", correlation))
## [1] "Correlation Coefficient: 0.915711716031204"
lr <- lm(Weight ~ Length1, data = fd)
summary(lr)
##
## Call:
## lm(formula = Weight ~ Length1, data = fd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -414.90 -71.26 6.08 98.31 363.84
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -462.375 32.243 -14.34 <2e-16 ***
## Length1 32.792 1.148 28.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 144.3 on 157 degrees of freedom
## Multiple R-squared: 0.8385, Adjusted R-squared: 0.8375
## F-statistic: 815.3 on 1 and 157 DF, p-value: < 2.2e-16
plot(fd$Length1, fd$Weight,
xlab = "Length1", ylab = "Weight",
main = " Simple Linear Regression")
##Do a multiple linear regression
mlr <- lm(Weight ~ Length1 + Length2 + Length3 + Height + Width, data = fd)
summary(mlr)
##
## Call:
## lm(formula = Weight ~ Length1 + Length2 + Length3 + Height +
## Width, data = fd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -243.69 -65.10 -25.52 57.98 447.25
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -499.587 29.572 -16.894 < 2e-16 ***
## Length1 62.355 40.209 1.551 0.12302
## Length2 -6.527 41.759 -0.156 0.87601
## Length3 -29.026 17.353 -1.673 0.09643 .
## Height 28.297 8.729 3.242 0.00146 **
## Width 22.473 20.372 1.103 0.27169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 123.2 on 153 degrees of freedom
## Multiple R-squared: 0.8853, Adjusted R-squared: 0.8815
## F-statistic: 236.2 on 5 and 153 DF, p-value: < 2.2e-16
##Create a polynomial regression
degree <- 2
pr <- lm(Weight ~ poly(Length1, degree), data = fd)
summary(pr)
##
## Call:
## lm(formula = Weight ~ poly(Length1, degree), data = fd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -428.27 -62.07 -2.54 75.17 384.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 398.33 11.17 35.661 < 2e-16 ***
## poly(Length1, degree)1 4120.44 140.85 29.255 < 2e-16 ***
## poly(Length1, degree)2 417.99 140.85 2.968 0.00347 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 140.8 on 156 degrees of freedom
## Multiple R-squared: 0.8472, Adjusted R-squared: 0.8452
## F-statistic: 432.3 on 2 and 156 DF, p-value: < 2.2e-16
value <- fd$Weight
pred <- predict(pr, newdata = fd)
plot(value, pred,
xlab = "Weight", ylab = "Weight Prediction",
main = "Weight vs. Predicted Weight Polynomial Regression")
#Insights ###We used the two variables of Length and Weight, because we wanted to compare if the weight of the animal, in this case fish, also influences its length. ###The correlation coefficient is closer to 1, that means you have a much stronger relationship in the model. ###The difference between simple and multiple regression is in the number of independent variables used to predict the independent variable. ###In simple regression there is only one independent variable that is used to predict the dependent variable. Also, the relationship of the two variables is shown as a straight line within the model. ###In multiple regression there are two or more variables that can be used to predict the dependent variable. And the relationship is modeled as a linear combination for the independent variables.