library(tidyverse)
setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Statistical BootCamp/WK3")
datatable<- read.csv("Price_vs_Age.csv")
head(datatable)
## Age Price
## 1 6 125
## 2 6 115
## 3 6 130
## 4 4 160
## 5 2 219
## 6 5 150
dim(datatable)
## [1] 10 2
names(datatable)
## [1] "Age" "Price"
plot(datatable$Age, datatable$Price)
cor(datatable$Age, datatable$Price)
## [1] -0.9678716
cor.test(datatable$Age, datatable$Price)
##
## Pearson's product-moment correlation
##
## data: datatable$Age and datatable$Price
## t = -10.887, df = 8, p-value = 4.484e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9926062 -0.8659576
## sample estimates:
## cor
## -0.9678716
fit_model <- lm(Price~Age, data = datatable)
fit_model
##
## Call:
## lm(formula = Price ~ Age, data = datatable)
##
## Coefficients:
## (Intercept) Age
## 291.6 -27.9
plot(datatable$Age, datatable$Price)
abline(fit_model)
par(mfrow = c(2,2))
plot(fit_model)
### outlier can be find by cook distance plot, there is no outlier in this case ### Normal Q-Q plot follow normal distribution ### Residual plot follow normal distribution
summary(fit_model)
##
## Call:
## lm(formula = Price ~ Age, data = datatable)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.9903 -7.8131 -0.6359 8.9612 24.2039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 291.602 11.433 25.51 5.98e-09 ***
## Age -27.903 2.563 -10.89 4.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.25 on 8 degrees of freedom
## Multiple R-squared: 0.9368, Adjusted R-squared: 0.9289
## F-statistic: 118.5 on 1 and 8 DF, p-value: 4.484e-06
par(mfrow = c(2,2))
plot(fit_model)
summary(fit_model)
##
## Call:
## lm(formula = Price ~ Age, data = datatable)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.9903 -7.8131 -0.6359 8.9612 24.2039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 291.602 11.433 25.51 5.98e-09 ***
## Age -27.903 2.563 -10.89 4.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.25 on 8 degrees of freedom
## Multiple R-squared: 0.9368, Adjusted R-squared: 0.9289
## F-statistic: 118.5 on 1 and 8 DF, p-value: 4.484e-06
newdata <- data.frame(Age=4)
head(newdata)
## Age
## 1 4
predict(fit_model, newdata)
## 1
## 179.9903
fit_model
##
## Call:
## lm(formula = Price ~ Age, data = datatable)
##
## Coefficients:
## (Intercept) Age
## 291.6 -27.9
predict(fit_model, newdata, interval = "predict")
## fit lwr upr
## 1 179.9903 145.5292 214.4514