———————————————————————–
#Remove the environment variable
rm(list=ls())
#Load Packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(ggfortify)
#Set the working directory
setwd("D:\\R Course")
#Load GardenOzone.csv file
plant=read.csv("plant.growth.rate.csv")
glimpse(plant)
## Rows: 50
## Columns: 2
## $ soil.moisture.content <dbl> 0.4696876, 0.5413106, 1.6979915, 0.8255799, 0.85…
## $ plant.growth.rate <dbl> 21.31695, 27.03072, 38.98937, 30.19529, 37.06547…
#Build scatter plot to see the distribution
ggplot(plant,aes(x=soil.moisture.content,
y=plant.growth.rate))+
geom_point(size=2)+theme_bw()+
ggtitle("Moisture Vs. Growth")+
xlab("Moisture")+
ylab("Growth")+
geom_smooth(method=lm,se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

#Build the regression model
model_pg=lm(plant.growth.rate~soil.moisture.content,data=plant)
#Assumption Validation
autoplot(model_pg,smooth.colour = NA)
## Warning: Removed 50 rows containing missing values (`geom_line()`).
## Removed 50 rows containing missing values (`geom_line()`).
## Removed 50 rows containing missing values (`geom_line()`).

#Annova for regression
anova(model_pg)
## Analysis of Variance Table
##
## Response: plant.growth.rate
## Df Sum Sq Mean Sq F value Pr(>F)
## soil.moisture.content 1 2521.15 2521.15 156.08 < 2.2e-16 ***
## Residuals 48 775.35 16.15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Regression Summary
summary(model_pg)
##
## Call:
## lm(formula = plant.growth.rate ~ soil.moisture.content, data = plant)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.9089 -3.0747 0.2261 2.6567 8.9406
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.348 1.283 15.08 <2e-16 ***
## soil.moisture.content 12.750 1.021 12.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.019 on 48 degrees of freedom
## Multiple R-squared: 0.7648, Adjusted R-squared: 0.7599
## F-statistic: 156.1 on 1 and 48 DF, p-value: < 2.2e-16