options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("readxl")
## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mnava\AppData\Local\R\win-library\4.5\00LOCK\readxl\libs\x64\readxl.dll
## to C:\Users\mnava\AppData\Local\R\win-library\4.5\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\mnava\AppData\Local\Temp\RtmpUNoeNn\downloaded_packages
library(readxl)
#Our Excel file imported into RStudio
A5RQ1 <- read_excel("D:/Ms Analytics 2025/Fall 1/Applied Analytics &Methods 1/Week 5/A5RQ1.xlsx")
install.packages("psych")
## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'psych' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\mnava\AppData\Local\Temp\RtmpUNoeNn\downloaded_packages
library(psych)
describe(A5RQ1[, c("Minutes", "Drinks")])
## vars n mean sd median trimmed mad min max range skew kurtosis
## Minutes 1 461 29.89 18.63 24.4 26.99 15.12 10 154.2 144.2 1.79 5.20
## Drinks 2 461 3.00 1.95 3.0 2.75 1.48 0 17.0 17.0 1.78 6.46
## se
## Minutes 0.87
## Drinks 0.09
describe(A5RQ1[, c("Minutes", "Drinks")])
## vars n mean sd median trimmed mad min max range skew kurtosis
## Minutes 1 461 29.89 18.63 24.4 26.99 15.12 10 154.2 144.2 1.79 5.20
## Drinks 2 461 3.00 1.95 3.0 2.75 1.48 0 17.0 17.0 1.78 6.46
## se
## Minutes 0.87
## Drinks 0.09
install.packages("ggplot2")#
## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\mnava\AppData\Local\Temp\RtmpUNoeNn\downloaded_packages
install.packages("ggpubr")#
## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggpubr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\mnava\AppData\Local\Temp\RtmpUNoeNn\downloaded_packages
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(ggpubr)
ggscatter(A5RQ1, x = "Minutes", y = "Drinks",
add = "reg.line",
conf.int = TRUE,
cor.coef = TRUE,
cor.method = "spearman",
xlab = "Minutes", ylab = "Drinks")
hist(A5RQ1$Minutes,
main = "Histogram of Minutes",
xlab = "Value",
ylab = "Frequency",
col = "lightblue",
border = "black",
breaks = 20)
hist(A5RQ1$Drinks,
main = "Histogram of Drinks",
xlab = "Value",
ylab = "Frequency",
col = "lightgreen",
border = "black",
breaks = 20)
#Observations:
#We use a statistical test to check the normality of the continuous variables # The Shapiro-Wilk Test is a test that checks skewness and kurtosis at the same time. # The test is checking “Is this variable the SAME as normal data (null hypothesis) or DIFFERENT from normal data (alternate hypothesis)?” # For this test, if p is GREATER than .05 (p > .05), the data is NORMAL. # If p is LESS than .05 (p < .05), the data is NOT normal.
shapiro.test(A5RQ1$Minutes)
##
## Shapiro-Wilk normality test
##
## data: A5RQ1$Minutes
## W = 0.84706, p-value < 2.2e-16
shapiro.test(A5RQ1$Drinks)
##
## Shapiro-Wilk normality test
##
## data: A5RQ1$Drinks
## W = 0.85487, p-value < 2.2e-16
#Observations # The data was not normally distrubuted for both Minutes and Drinks
cor.test(A5RQ1$Minutes, A5RQ1$Drinks, method = "spearman")
## Warning in cor.test.default(A5RQ1$Minutes, A5RQ1$Drinks, method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: A5RQ1$Minutes and A5RQ1$Drinks
## S = 1305608, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9200417
#Our results indicate that p<2.2e-16 which is statistical significant.
#There is a correlation of 0.9541922 .As minutes increase ,the number of drinks bought is also increasing. #The relationship between minutes spent and drinks bought is also strong