library(readxl)
library(ggpubr)
## Loading required package: ggplot2
DatasetC <- read_excel("C:/Users/varun/Downloads/DatasetC.xlsx")
mean(DatasetC$inches)
## [1] 69.27122
sd(DatasetC$inches)
## [1] 2.738448
mean(DatasetC$pounds)
## [1] 195.7736
sd(DatasetC$pounds)
## [1] 29.0096
hist(DatasetC$inches,
main = "inches",
breaks = 20,
col = "lightblue",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
hist(DatasetC$pounds,
main = "pounds",
breaks = 20,
col = "lightcoral",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
#The variable “inches” appears normally distributed. The data looks symmetrical (most data is in the middle).The data also appears to have a proper bell curve.
#The variable “pounds” appears is not normally distributed. The data is positively skewed (most data is on the left).
shapiro.test(DatasetC$inches)
##
## Shapiro-Wilk normality test
##
## data: DatasetC$inches
## W = 0.99388, p-value = 0.9349
shapiro.test(DatasetC$pounds)
##
## Shapiro-Wilk normality test
##
## data: DatasetC$pounds
## W = 0.97289, p-value = 0.03691
#The Shaprio-Wilk p-value for inches normality test is greater than .05 (.93), so the data is normal.
#The Shaprio-Wilk p-value for pounds normality test is less than .05 (.03), so the data is NOT normal.
cor.test(DatasetC$inches, DatasetC$pounds, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: DatasetC$inches and DatasetC$pounds
## S = 166000, p-value = 0.9693
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.00390039
#The Spearman Correlation test was selected because pounds failed Shapiro-wilk normality test.
#The p-value (probability value) is 0.9693, which is above .05. This means the results are not statistically significant. The null hypothesis is supported.
#The rho-value is 0.00390039.
#The correlation is positive, which means as Height(inches) increases, Weight(pounds) increases but it is negligible or doesnot apply in this case as there is no relationship between inches and pounds.
#The correlation value falls within ± 0.00 to 0.09, which means there is no relationship.
ggscatter(
DatasetC,
x = "inches",
y = "pounds",
add = "reg.line",
xlab = "inches",
ylab = "pounds"
)
#The line of best fit is horizontal. This means there is no relationship between inches and pounds.
#The dots are randomly around the line, this indicates no relationship between the two variables.
#The dots form a curved pattern, the relationship is non-linear. Since the relationship is non-linear, a Spearman Correlation should be used.
#There is possibly one outlier.
library(rmarkdown)