Sample of Hypothesis Test

Load the library function

library(tidyverse)

Hypothesis1 anaysis

setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Statistical BootCamp/WK3")
hypo1<- read.csv("hypothesis1.csv")
dim(hypo1)

## [1] 500   2

names(hypo1)

## [1] "ID"    "Price"

class(hypo1)

## [1] "data.frame"

head(hypo1)

##   ID Price
## 1  1   8.7
## 2  2  29.2
## 3  3  30.7
## 4  4   4.1
## 5  5  23.1
## 6  6  20.1

To test the mean = 20

Null Hypothesis: mean = 20,

Althernate Hypothesis: mean != 20

Plot the graph

par(mfrow = c(1,1))
hist(hypo1$Price)

## Peform t-test

t.test(hypo1$Price)

## 
##  One Sample t-test
## 
## data:  hypo1$Price
## t = 47.925, df = 499, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  19.40979 21.06925
## sample estimates:
## mean of x 
##  20.23952

P-value is < 0.05, reject null hypothesis, mean != 20

Hypothesis mean = 15, mean != 15

Hypothesis mean = 15, mean > 15

mean(hypo1$Price)

## [1] 20.23952

t.test(hypo1$Price,mu=15)

## 
##  One Sample t-test
## 
## data:  hypo1$Price
## t = 12.407, df = 499, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 15
## 95 percent confidence interval:
##  19.40979 21.06925
## sample estimates:
## mean of x 
##  20.23952

t.test(hypo1$Price,mu=15, alternative = 'greater')

## 
##  One Sample t-test
## 
## data:  hypo1$Price
## t = 12.407, df = 499, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 15
## 95 percent confidence interval:
##  19.54358      Inf
## sample estimates:
## mean of x 
##  20.23952

Hypothesis2 anaysis

setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Statistical BootCamp/WK3")
hypo2<- read.csv("hypothesis2.csv")
dim(hypo2)

## [1] 500   3

names(hypo2)

## [1] "ID"       "Price_sg" "Price_us"

head(hypo2)

##   ID Price_sg Price_us
## 1  1      8.7 22.42744
## 2  2     29.2 37.45265
## 3  3     30.7 41.16520
## 4  4      4.1 35.24449
## 5  5     23.1 35.86244
## 6  6     20.1 43.95911

To test the mean of SG is equal to mean of US

Null Hypothesis: mean_SG = mean_US,

Alternate hypothesis: mean_SG != mean US

Plot the graph

hist((hypo2$Price_sg), col = "green")
hist((hypo2$Price_us), col = "yellow", add=T)
box()

## Perform t-test on both SG price and US price

t.test(hypo2$Price_sg, hypo2$Price_us)

## 
##  Welch Two Sample t-test
## 
## data:  hypo2$Price_sg and hypo2$Price_us
## t = -16.576, df = 996.32, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.85135  -8.55406
## sample estimates:
## mean of x mean of y 
##  20.23952  29.94223

P-value < 0.05 ==> null hypothesis rejected,

Price_sg is not equal to Price_US

Answer

mean(hypo2$Price_sg)

## [1] 20.23952

mean(hypo2$Price_us)

## [1] 29.94223

Price Singapore = Price US or Price Singapore < Price US

t.test(hypo2$Price_sg,hypo2$Price_us)

## 
##  Welch Two Sample t-test
## 
## data:  hypo2$Price_sg and hypo2$Price_us
## t = -16.576, df = 996.32, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.85135  -8.55406
## sample estimates:
## mean of x mean of y 
##  20.23952  29.94223

t.test(hypo2$Price_sg,hypo2$Price_us, alternative = 'less')

## 
##  Welch Two Sample t-test
## 
## data:  hypo2$Price_sg and hypo2$Price_us
## t = -16.576, df = 996.32, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf -8.739007
## sample estimates:
## mean of x mean of y 
##  20.23952  29.94223

Hypothesis3 analysis

setwd("C:/Users/ngsook/Desktop/NUS EBA/Semester 2/Statistical BootCamp/WK3")
hypo3<- read.csv("hypothesis3.csv")
dim(hypo3)

## [1] 500   3

names(hypo3)

## [1] "ID"            "Price_sg_2018" "Price_sg_2019"

head(hypo3)

##   ID Price_sg_2018 Price_sg_2019
## 1  1      8.695019      19.19876
## 2  2     29.157653      14.23790
## 3  3     30.748636      21.78142
## 4  4      4.072906      24.25106
## 5  5     23.134995      19.42378
## 6  6     20.140968      12.82232

To test the mean of 2018 is equal to mean of 2019

Null Hypothesis: mean_2018 = mean_2019,

Alternate hypothesis: mean_2018 != mean_2019

Plot the graph

hist((hypo3$Price_sg_2018), col = "green")
hist((hypo3$Price_sg_2019), col = "yellow", add=T)
box()

Perform t-test on both SG price and US price

t.test(hypo3$Price_sg_2018, hypo3$Price_sg_2019)

## 
##  Welch Two Sample t-test
## 
## data:  hypo3$Price_sg_2018 and hypo3$Price_sg_2019
## t = 0.79654, df = 774.29, p-value = 0.426
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5618904  1.3292627
## sample estimates:
## mean of x mean of y 
##  20.23838  19.85469

lapply(hypo3, mean)

## $ID
## [1] 250.5
## 
## $Price_sg_2018
## [1] 20.23838
## 
## $Price_sg_2019
## [1] 19.85469

P-value > 0.05 ==> fail to reject null hypothesis,

Price_sg_2018 is equal to Price_sg_2019

Answer

mean(hypo3$Price_sg_2018)

## [1] 20.23838

mean(hypo3$Price_sg_2019)

## [1] 19.85469

t.test(hypo3$Price_sg_2018,hypo3$Price_sg_2019)

## 
##  Welch Two Sample t-test
## 
## data:  hypo3$Price_sg_2018 and hypo3$Price_sg_2019
## t = 0.79654, df = 774.29, p-value = 0.426
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5618904  1.3292627
## sample estimates:
## mean of x mean of y 
##  20.23838  19.85469