Part 1: Read the data..

# reading external data and storing into a dataframe called "airline.df"
data.df <- read.csv("SubAdvData.csv")

attach(data.df)
tab <- table(adType,restaurantType)
tab

##           restaurantType
## adType     chain independent
##   Curr Ads  2023        2972
##   New Ads   1958        3010
##   No Ads    2003        3034

addmargins(tab,2)

##           restaurantType
## adType     chain independent  Sum
##   Curr Ads  2023        2972 4995
##   New Ads   1958        3010 4968
##   No Ads    2003        3034 5037

PercentProportion <- prop.table(tab) 
PercentProportion

##           restaurantType
## adType         chain independent
##   Curr Ads 0.1348667   0.1981333
##   New Ads  0.1305333   0.2006667
##   No Ads   0.1335333   0.2022667

tab3 <- aggregate(reservations,by =list(restaurantType), FUN=mean)
tab3

##       Group.1        x
## 1       chain 42.58205
## 2 independent 32.50688

tab4 <- aggregate(reservations,by =list(adType), FUN=mean)
tab4

##    Group.1        x
## 1 Curr Ads 34.03283
## 2  New Ads 41.62762
## 3   No Ads 33.96724

cor(reservations, phoneCalls)

## [1] 0.6516813

cor(reservations, phoneCalls, method = "pearson")

## [1] 0.6516813

# one-proportion z-test
tab5 <- table(adType)
prop.test(x = 4995 , n = 15000 ,p = 0.4, correct = FALSE)

## 
##  1-sample proportions test without continuity correction
## 
## data:  4995 out of 15000, null probability 0.4
## X-squared = 280.56, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.4
## 95 percent confidence interval:
##  0.3255016 0.3405839
## sample estimates:
##     p 
## 0.333

#nULL HYPOTHESIS REJECTED 

# One-sample t-test
res <- t.test(reservations, mu = 40)
res

## 
##  One Sample t-test
## 
## data:  reservations
## t = -53.711, df = 14999, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 40
## 95 percent confidence interval:
##  36.39943 36.65297
## sample estimates:
## mean of x 
##   36.5262

#nULL HYPOTHESIS REJECTED 

 # using subset function
# creating a subset of dataset including only Airline = Air India
Avg_chain <- subset(data.df, 
                            restaurantType=="chain",select = reservations)
# creating a subset of dataset including only Airline = IndiGo
Avg_indep <- subset(data.df,
                          restaurantType=="independent",select = reservations)
# Computing t-test
tst <- t.test(Avg_chain, Avg_indep, var.equal = TRUE)
tst

## 
##  Two Sample t-test
## 
## data:  Avg_chain and Avg_indep
## t = 97.503, df = 14998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   9.872633 10.277718
## sample estimates:
## mean of x mean of y 
##  42.58205  32.50688

Airline DATA- EDA

Baar Baar R

today

Part 1: Read the data..