# Load required packages
library(readxl)
library(psych)
library(ggplot2)
library(ggpubr)
dataset <- read_excel("C:/Users/konifade/Downloads/A5RQ1.xlsx")
head(dataset)
## # A tibble: 6 × 3
## Customer Minutes Drinks
## <dbl> <dbl> <dbl>
## 1 1 26.9 3
## 2 2 21.5 2
## 3 3 36.6 3
## 4 4 10.6 1
## 5 5 11.1 1
## 6 6 16.3 1
describe(dataset[, c("Minutes", "Drinks")])
## vars n mean sd median trimmed mad min max range skew kurtosis
## Minutes 1 461 29.89 18.63 24.4 26.99 15.12 10 154.2 144.2 1.79 5.20
## Drinks 2 461 3.00 1.95 3.0 2.75 1.48 0 17.0 17.0 1.78 6.46
## se
## Minutes 0.87
## Drinks 0.09
# Histograms
hist(dataset$Minutes,
main = "Histogram of Minutes",
xlab = "Value", ylab = "Frequency",
col = "lightblue", border = "black", breaks = 20)
hist(dataset$Drinks,
main = "Histogram of Drinks",
xlab = "Value", ylab = "Frequency",
col = "lightgreen", border = "black", breaks = 20)
# Shapiro-Wilk tests
shapiro.test(dataset$Minutes)
##
## Shapiro-Wilk normality test
##
## data: dataset$Minutes
## W = 0.84706, p-value < 2.2e-16
shapiro.test(dataset$Drinks)
##
## Shapiro-Wilk normality test
##
## data: dataset$Drinks
## W = 0.85487, p-value < 2.2e-16
ggscatter(dataset, x = "Minutes", y = "Drinks",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "spearman",
xlab = "Variable Minutes", ylab = "Variable Drinks")
# Pearson correlation
cor.test(dataset$Minutes, dataset$Drinks, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: dataset$Minutes and dataset$Drinks
## t = 68.326, df = 459, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9452363 0.9617123
## sample estimates:
## cor
## 0.9541922
```