Hypothesis testing

We perform a complete hypothesis testing experiment to compare the Linear Vs. Non-Linear Models. This comparison is performed under two sets of results (datasets):

Data generated when z=0 and alpha= 0.01
data generated when z=80 and alpha =0.01

Hyposthesis testing when z=0

Null hypothesis: The mean of the number of items offered under the linear model is equal to the number offered under the non-linear model.H0:Ulinear-Unonlinear=0

Datamodel1znotest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE1lznotest.csv",header=TRUE)
Datamodel1nlznotest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE1nlznotest.csv",header=TRUE)
x<-data.frame(Users=1:671,Datamodel1znotest$Number,Datamodel1nlznotest$Number)
y<-c("Users","Model1","Model2")
colnames(x)<-y

Plots

Because the paired t-test is based on the underlying assumption of normality we plot the difference in data. As is evident the data is normally distributed to a large extent.

# derive the difference
x$diff <- x$Model1-x$Model2

#Check the distribution
boxplot(x$diff)

qqnorm(x$diff)
qqline(x$diff)

##Shapiro-Wilk test of normality: Adequate if p<0.1
shapiro.test(x$diff)

## 
##  Shapiro-Wilk normality test
## 
## data:  x$diff
## W = 0.98114, p-value = 1.327e-07

##Paired t-test
t.test(x$Model1,x$Model2, paired=TRUE)

## 
##  Paired t-test
## 
## data:  x$Model1 and x$Model2
## t = 88.831, df = 670, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  50.86371 53.16311
## sample estimates:
## mean of the differences 
##                52.01341

Hyposthesis testing when z=80 (large)

Null hypothesis: The mean of the number of items offered under the non-linear model is equal to the number offered under the linear model.H0:Unonlinear-Ulinear=0

Datamodel1ztest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE1lztest.csv",header=TRUE)
Datamodel1nlztest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE1nlztest.csv",header=TRUE)
x1<-data.frame(Users=1:671,Datamodel1ztest$Number,Datamodel1nlztest$Number)
y1<-c("Users","Model1","Model2")
colnames(x1)<-y1

Plots

Because the paired t-test is based on the underlying assumption of normality we plot the difference in data.

# derive the difference
x1$diff <- x1$Model1-x1$Model2

#Check the distribution
boxplot(x1$diff)

qqnorm(x1$diff)
qqline(x1$diff)

##Shapiro-Wilk test of normality: Adequate if p<0.1
shapiro.test(x1$diff)

## 
##  Shapiro-Wilk normality test
## 
## data:  x1$diff
## W = 0.43548, p-value < 2.2e-16

##Paired t-test
t.test(x1$Model1,x1$Model2, paired=TRUE)

## 
##  Paired t-test
## 
## data:  x1$Model1 and x1$Model2
## t = 9.82, df = 670, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.298442 1.947460
## sample estimates:
## mean of the differences 
##                1.622951

The Second set of models

Experiment 1: When z is zero

Datamodel2znotest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE2lznotestnew.csv",header=TRUE)
Datamodel2nlznotest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE2nlznotestnew.csv",header=TRUE)
x2<-data.frame(Users=1:671,Datamodel2znotest$Number,Datamodel2nlznotest$Number)
y2<-c("Users","Model1","Model2")
colnames(x2)<-y2
# derive the difference
x2$diff <- x2$Model1-x2$Model2

#Check the distribution
boxplot(x2$diff)

qqnorm(x2$diff)
qqline(x2$diff)

##Shapiro-Wilk test of normality: Adequate if p<0.1
shapiro.test(x2$diff)

## 
##  Shapiro-Wilk normality test
## 
## data:  x2$diff
## W = 0.70572, p-value < 2.2e-16

##Paired t-test
t.test(x2$Model1,x2$Model2, paired=TRUE)

## 
##  Paired t-test
## 
## data:  x2$Model1 and x2$Model2
## t = 87.054, df = 670, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  34.90251 36.51329
## sample estimates:
## mean of the differences 
##                 35.7079

Experiment 2: When z=80

Datamodel2ztest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE2lztestnew.csv",header=TRUE)
Datamodel2nlztest<-read.csv("/Users/uzma/Downloads/ml-latest-small/MNLCE2nlztestnew.csv",header=TRUE)
x3<-data.frame(Users=1:671,Datamodel2ztest$Number,Datamodel2nlztest$Number)
y3<-c("Users","Model1","Model2")
colnames(x3)<-y3
# derive the difference
x3$diff <- x3$Model1-x3$Model2

#Check the distribution
boxplot(x3$diff)

qqnorm(x3$diff)
qqline(x3$diff)

##Shapiro-Wilk test of normality: Adequate if p<0.1
shapiro.test(x3$diff)

## 
##  Shapiro-Wilk normality test
## 
## data:  x3$diff
## W = 0.080248, p-value < 2.2e-16

##Paired t-test
t.test(x3$Model1,x3$Model2, paired=TRUE)

## 
##  Paired t-test
## 
## data:  x3$Model1 and x3$Model2
## t = 403.84, df = 670, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  39.53856 39.92492
## sample estimates:
## mean of the differences 
##                39.73174

Hypothesis testing

Uzma Mushtaque

April 11, 2017