Load necessary packages:
library(pander)
library(ggpubr)
## Loading required package: ggplot2
library(outliers)
\(~\)
CORRELATION EXAMPLE: Create the data vectors and the data frame manually in RStudio:
experience <- c(10, 12, 8, 15, 6, 11, 14, 16, 15, 12)
salaries <- c(98, 95, 97, 110, 88, 102, 120, 128, 105, 104)
cor.data <- data.frame(experience, salaries)
\(~\)
Check for Outliers:
plot1 <- boxplot(cor.data$experience, outcol = "red", cex=1.5)
plot1
## $stats
## [,1]
## [1,] 6
## [2,] 10
## [3,] 12
## [4,] 15
## [5,] 16
##
## $n
## [1] 10
##
## $conf
## [,1]
## [1,] 9.501801
## [2,] 14.498199
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] ""
plot2 <- boxplot(cor.data$salaries, outcol="red", cex=1.5)
plot2
## $stats
## [,1]
## [1,] 88
## [2,] 97
## [3,] 103
## [4,] 110
## [5,] 128
##
## $n
## [1] 10
##
## $conf
## [,1]
## [1,] 96.50468
## [2,] 109.49532
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] ""
\(~\)
Test for normality of data:
shapiro.test(cor.data$experience)
##
## Shapiro-Wilk normality test
##
## data: cor.data$experience
## W = 0.94701, p-value = 0.6333
shapiro.test(cor.data$salaries)
##
## Shapiro-Wilk normality test
##
## data: cor.data$salaries
## W = 0.94407, p-value = 0.5991
\(~\)
Perform Correlation Analysis:
pearson.cor <- cor.test(cor.data$experience, cor.data$salaries, method = "pearson")
pander(pearson.cor)
Test statistic | df | P value | Alternative hypothesis | cor |
---|---|---|---|---|
4.086 | 8 | 0.003504 * * | two.sided | 0.8222 |
\(~\)
SIMPLE LINEAR REGRESSION ANALYSIS EXAMPLE: Create the data vectors manually in RStudio
noise <- c(28, 33, 21, 35, 29, 26, 22, 30, 34, 27, 31, 34, 29)
hypertension <- c(73, 68, 69, 88, 80, 74, 74, 69, 89, 68, 76, 87, 73)
reg.data <- data.frame(noise, hypertension)
reg.data
## noise hypertension
## 1 28 73
## 2 33 68
## 3 21 69
## 4 35 88
## 5 29 80
## 6 26 74
## 7 22 74
## 8 30 69
## 9 34 89
## 10 27 68
## 11 31 76
## 12 34 87
## 13 29 73
\(~\)
Check for outliers:
plot1 <- boxplot(reg.data$noise, outcol="red", cex=1.5)
plot1
## $stats
## [,1]
## [1,] 21
## [2,] 27
## [3,] 29
## [4,] 33
## [5,] 35
##
## $n
## [1] 13
##
## $conf
## [,1]
## [1,] 26.37072
## [2,] 31.62928
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] ""
plot2 <- boxplot(reg.data$hypertension, outcol="red", cex=1.5)
plot2
## $stats
## [,1]
## [1,] 68
## [2,] 69
## [3,] 74
## [4,] 80
## [5,] 89
##
## $n
## [1] 13
##
## $conf
## [,1]
## [1,] 69.17966
## [2,] 78.82034
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] ""
\(~\)
Create a scatterplot of the data:
plot(noise, hypertension, main="Scatterplot of Data", xlab="Noise Level", ylab="Hypertension")
ggscatter(reg.data, x="noise", y="hypertension", xlab="Noise Level", ylab="Hypertension", add="reg.line", method="pearson")
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'
\(~\)
Perform the Simple Linear Regression analysis:
reg.model <-lm(reg.data$hypertension~reg.data$noise, data=reg.data)
summary(reg.model)
##
## Call:
## lm(formula = reg.data$hypertension ~ reg.data$noise, data = reg.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.082 -2.837 1.347 5.592 7.857
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.0612 12.1720 3.702 0.00349 **
## reg.data$noise 1.0612 0.4132 2.569 0.02612 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.316 on 11 degrees of freedom
## Multiple R-squared: 0.3749, Adjusted R-squared: 0.3181
## F-statistic: 6.597 on 1 and 11 DF, p-value: 0.02612