## [1] 15000 6
## [1] "adType" "pageViews" "phoneCalls" "reservations"
## [5] "businessID" "restaurantType"
## 'data.frame': 15000 obs. of 6 variables:
## $ adType : Factor w/ 3 levels "Curr Ads","New Ads",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ pageViews : int 643 592 648 507 591 563 629 646 649 649 ...
## $ phoneCalls : int 44 35 45 40 42 43 46 39 49 41 ...
## $ reservations : int 39 31 46 30 37 38 38 39 41 40 ...
## $ businessID : int 1 4 5 9 12 17 20 21 23 24 ...
## $ restaurantType: Factor w/ 2 levels "chain","independent": 1 1 1 1 1 1 1 1 1 1 ...
## adType pageViews phoneCalls reservations
## Curr Ads:4925 Min. :145.0 Min. :17.00 Min. :17.00
## New Ads :5073 1st Qu.:328.0 1st Qu.:32.00 1st Qu.:31.00
## No Ads :5002 Median :392.0 Median :37.00 Median :36.00
## Mean :467.8 Mean :37.73 Mean :36.58
## 3rd Qu.:635.0 3rd Qu.:42.00 3rd Qu.:41.00
## Max. :913.0 Max. :75.00 Max. :78.00
## businessID restaurantType
## Min. : 1 chain :5990
## 1st Qu.: 7630 independent:9010
## Median :14954
## Mean :15029
## 3rd Qu.:22558
## Max. :30000
## vars n mean sd median trimmed mad min
## adType* 1 15000 2.01 0.81 2.0 2.01 1.48 1
## pageViews 2 15000 467.82 168.46 392.0 457.83 152.71 145
## phoneCalls 3 15000 37.73 8.02 37.0 37.19 7.41 17
## reservations 4 15000 36.58 8.05 36.0 35.97 7.41 17
## businessID 5 15000 15029.49 8647.64 14953.5 15031.39 11059.45 1
## restaurantType* 6 15000 1.60 0.49 2.0 1.63 0.00 1
## max range skew kurtosis se
## adType* 3 2 -0.01 -1.49 0.01
## pageViews 913 768 0.45 -1.29 1.38
## phoneCalls 75 58 0.69 0.61 0.07
## reservations 78 61 0.78 0.81 0.07
## businessID 30000 29999 0.00 -1.20 70.61
## restaurantType* 2 1 -0.41 -1.83 0.00
adType## [1] "Curr Ads" "New Ads" "No Ads"
adType##
## Curr Ads New Ads No Ads
## 4925 5073 5002
adType##
## Curr Ads New Ads No Ads
## 32.83 33.82 33.35
adType & restaurantType##
## chain independent Sum
## Curr Ads 1959 2966 4925
## New Ads 2060 3013 5073
## No Ads 1971 3031 5002
## Sum 5990 9010 15000
adType & restaurantTypetab1 <- table(SubAdvData$adType,SubAdvData$restaurantType)
tab2 <- round(addmargins(prop.table(tab1,1),2),2)
tab2##
## chain independent Sum
## Curr Ads 0.40 0.60 1.00
## New Ads 0.41 0.59 1.00
## No Ads 0.39 0.61 1.00
# descriptive Statistics using aggregate() fuction
avb <- aggregate(SubAdvData["reservations"],
by = list(restaurantType = SubAdvData$restaurantType), mean)
avb## restaurantType reservations
## 1 chain 42.77462
## 2 independent 32.45705
# descriptive Statistics using aggregate() fuction
avb <- aggregate(SubAdvData["reservations"],
by = list(adType = SubAdvData$adType), mean)
avb## adType reservations
## 1 Curr Ads 34.00528
## 2 New Ads 41.77252
## 3 No Ads 33.84046
reservations, with respect to variables restaurantType & adType, as shown in the following output# summary statistics by groups
library(data.table)
SubAdvData1 <- data.table(SubAdvData)
tab1 <- SubAdvData1[, .(N = .N,
Meanreservation = round(mean(reservations),2),
SDreservation = round(sd(reservations),2)),
by = .(restaurantType,adType)][order(restaurantType)]
tab1## restaurantType adType N Meanreservation SDreservation
## 1: chain No Ads 1971 39.82 5.11
## 2: chain Curr Ads 1959 40.15 4.99
## 3: chain New Ads 2060 48.10 8.60
## 4: independent No Ads 3031 29.95 4.01
## 5: independent Curr Ads 2966 29.95 3.55
## 6: independent New Ads 3013 37.45 4.05
reservations & phoneCalls using cor()## [1] 0.662667
reservations & phoneCalls as shown below.##
## Pearson's product-moment correlation
##
## data: reservations and phoneCalls
## t = 108.36, df = 14998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6535950 0.6715486
## sample estimates:
## cor
## 0.662667
# taking a subset of continuous variables
Subset.df <- advData[
,c('reservations','pageViews','phoneCalls')]
# correlation matrix on new dataframe
corMat <- cor(Subset.df, use = "complete")
# round off upto 2 decimal places
round(corMat, 3)## reservations pageViews phoneCalls
## reservations 1.000 0.686 0.658
## pageViews 0.686 1.000 0.722
## phoneCalls 0.658 0.722 1.000
Curr Ads is equal to 40% ?##
## 1-sample proportions test without continuity correction
##
## data: 4989 out of 15000, null probability 0.4
## X-squared = 283.92, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.4
## 95 percent confidence interval:
## 0.3251040 0.3401818
## sample estimates:
## p
## 0.3326
Curr Ads, New Ads & No Ads are equal ?## Warning in chisq.test(prop.table(tab1), p = c(1/3, 1/3, 1/3)): Chi-squared
## approximation may be incorrect
##
## Chi-squared test for given probabilities
##
## data: prop.table(tab1)
## X-squared = 0.00014611, df = 2, p-value = 0.9999
Curr Ads in chain restaurants is equal to the independent restaurants ?##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: c(1993, 2996) out of c(5966, 9034)
## X-squared = 0.084477, df = 1, p-value = 0.7713
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.01312639 0.01797365
## sample estimates:
## prop 1 prop 2
## 0.3340597 0.3316360
restaurantType & adType?##
## Pearson's Chi-squared test
##
## data: tab
## X-squared = 1.5944, df = 2, p-value = 0.4506
##
## One Sample t-test
##
## data: reservations
## t = -52.06, df = 14999, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 40
## 95 percent confidence interval:
## 36.44833 36.70607
## sample estimates:
## mean of x
## 36.5772
# using subset function
# creating a subset of dataset including only Airline = Air India
restaurantsOfChain <- subset(SubAdvData,
restaurantType == "chain", select = "reservations")
# creating a subset of dataset including only Airline = IndiGo
restaurantsOfindependent <- subset(SubAdvData,
restaurantType == "independent", select = "reservations")
# Computing t-test
tst <- t.test(restaurantsOfChain, restaurantsOfindependent, var.equal = TRUE)
tst##
## Two Sample t-test
##
## data: restaurantsOfChain and restaurantsOfindependent
## t = 98.711, df = 14998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 10.11270 10.52245
## sample estimates:
## mean of x mean of y
## 42.77462 32.45705