Statistics using R

Ph.D. Course Work - Computer Application

Part 4

———————————————————————–

#Remove the environment variable
rm(list=ls())
#Load Packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(ggfortify)
#Set the working directory
setwd("D:\\R Course")
#Load GardenOzone.csv file
plant=read.csv("plant.growth.rate.csv")
glimpse(plant)
## Rows: 50
## Columns: 2
## $ soil.moisture.content <dbl> 0.4696876, 0.5413106, 1.6979915, 0.8255799, 0.85…
## $ plant.growth.rate     <dbl> 21.31695, 27.03072, 38.98937, 30.19529, 37.06547…
#Build scatter plot to see the distribution
ggplot(plant,aes(x=soil.moisture.content,
                 y=plant.growth.rate))+
  geom_point(size=2)+theme_bw()+
  ggtitle("Moisture Vs. Growth")+
  xlab("Moisture")+
  ylab("Growth")+
  geom_smooth(method=lm,se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

#Build the regression model
model_pg=lm(plant.growth.rate~soil.moisture.content,data=plant)
#Assumption Validation
autoplot(model_pg,smooth.colour = NA)
## Warning: Removed 50 rows containing missing values (`geom_line()`).
## Removed 50 rows containing missing values (`geom_line()`).
## Removed 50 rows containing missing values (`geom_line()`).

#Annova for regression 
anova(model_pg)
## Analysis of Variance Table
## 
## Response: plant.growth.rate
##                       Df  Sum Sq Mean Sq F value    Pr(>F)    
## soil.moisture.content  1 2521.15 2521.15  156.08 < 2.2e-16 ***
## Residuals             48  775.35   16.15                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Regression Summary
summary(model_pg)
## 
## Call:
## lm(formula = plant.growth.rate ~ soil.moisture.content, data = plant)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.9089 -3.0747  0.2261  2.6567  8.9406 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             19.348      1.283   15.08   <2e-16 ***
## soil.moisture.content   12.750      1.021   12.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.019 on 48 degrees of freedom
## Multiple R-squared:  0.7648, Adjusted R-squared:  0.7599 
## F-statistic: 156.1 on 1 and 48 DF,  p-value: < 2.2e-16