2 Proportion Test
# Read the dataset from Excel
library(readxl)
sales_data <- read_excel("C:\\Users\\samy_\\Desktop\\R_Python_Machine Learning DataSets\\Adults_Children_Sales.xlsx")
#str(sales_data) # Structure of Data
attach(sales_data)
#sales_data_adult <- factor(sales_data$Adults, levels = c("Purchased", "Did not Purchase"))
#sales_data_children <- factor(sales_data$Children, levels = c("Purchased", "Did not Purchase"))
# Find frequencies of purchased and did not purchase
sales_adult_tb <- table(Adults)
sales_children_tb <- table(Children)
#View(sales_adult_tb)
#View(sales_children_tb)
# 2 Proportion Test
prop.test(c(sales_adult_tb[2], sales_children_tb[2]), c(length(Adults), length(Children)))
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: c(sales_adult_tb[2], sales_children_tb[2]) out of c(length(Adults), length(Children))
## X-squared = 1.0064, df = 1, p-value = 0.3158
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.33394732 0.09394732
## sample estimates:
## prop 1 prop 2
## 0.40 0.52
# Method 2: 2 Proportion Test
stacked_sales <- stack(sales_data)
sales_freq <- table(stacked_sales)
sales_freq
## ind
## values Adults Children
## Did not Purchase 30 24
## Purchased 20 26
# Proportion table
prop.table(sales_freq, 2)
## ind
## values Adults Children
## Did not Purchase 0.60 0.48
## Purchased 0.40 0.52
# Test result
prop.test(sales_freq, correct = T)
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: sales_freq
## X-squared = 1.0064, df = 1, p-value = 0.3158
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.09451548 0.33606137
## sample estimates:
## prop 1 prop 2
## 0.5555556 0.4347826
Chi-Square Test
# Read the dataset from Excel
imrb_research <- read_excel("C:\\Users\\samy_\\Desktop\\R_Python_Machine Learning DataSets\\imrb_research.xlsx")
imrb_research <- imrb_research[,1:4]
str(imrb_research)
## Classes 'tbl_df', 'tbl' and 'data.frame': 49 obs. of 4 variables:
## $ India : num 1 0 0 0 0 0 1 1 1 1 ...
## $ China : num 0 1 0 1 0 0 1 1 1 1 ...
## $ Srilanka : num 0 1 0 0 0 0 0 0 0 0 ...
## $ Bangladesh: num 0 1 1 1 1 1 1 1 1 1 ...
attach(imrb_research)
stack_imrb <- stack(imrb_research)
imrb_freq <- table(stack_imrb)
imrb_freq
## ind
## values India China Srilanka Bangladesh
## 0 28 30 25 17
## 1 21 19 24 32
prop.table(imrb_freq, 2)
## ind
## values India China Srilanka Bangladesh
## 0 0.5714286 0.6122449 0.5102041 0.3469388
## 1 0.4285714 0.3877551 0.4897959 0.6530612
chisq.test(imrb_freq)
##
## Pearson's Chi-squared test
##
## data: imrb_freq
## X-squared = 8.0033, df = 3, p-value = 0.04594