#Q.1(a)Write R code to read the data into a dataframe called “df”"

df = read.csv("SubAdvData.csv")
attach(df)

#Q.1b Also write R code to read the data into a data table called “dt”

library(data.table)
dt=fread("SubAdvData.csv")
attach(dt)

## The following objects are masked from df:
## 
##     adType, businessID, pageViews, phoneCalls, reservations,
##     restaurantType

#Q.2 Write R code to get the dimensions of the dataframe “df”

dim(df)

## [1] 15000     7

tab1<-table(df$adType,df$restaurantType)
addmargins(tab1,c(1,2))

##           
##            chain independent   Sum
##   Curr Ads  2023        2972  4995
##   New Ads   1958        3010  4968
##   No Ads    2003        3034  5037
##   Sum       5984        9016 15000

proptable<-round(prop.table(tab1),2)
proptable

##           
##            chain independent
##   Curr Ads  0.13        0.20
##   New Ads   0.13        0.20
##   No Ads    0.13        0.20

aggregate(df$reservations,
          by = list(df$restaurantType),mean)

##       Group.1        x
## 1       chain 42.58205
## 2 independent 32.50688

#Q.3 Write R code to list the column names of the dataframe “df”

aggregate(df$reservations,
          by = list(df$adType),mean)

##    Group.1        x
## 1 Curr Ads 34.03283
## 2  New Ads 41.62762
## 3   No Ads 33.96724

PART 2

cor(df$reservations, df$phoneCalls, method = "pearson")

## [1] 0.6516813

subset.df = df[,c('reservations','phoneCalls')]
corMat <- cor(subset.df, use = "complete")
# round off upto 2 decimal places
round(corMat, 3)

##              reservations phoneCalls
## reservations        1.000      0.652
## phoneCalls          0.652      1.000

prop.test(x = 4995 , n = 15000 ,p = 0.4, correct = FALSE)

## 
##  1-sample proportions test without continuity correction
## 
## data:  4995 out of 15000, null probability 0.4
## X-squared = 280.56, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.4
## 95 percent confidence interval:
##  0.3255016 0.3405839
## sample estimates:
##     p 
## 0.333

res <- t.test(df$reservations, mu = 40)
res

## 
##  One Sample t-test
## 
## data:  df$reservations
## t = -53.711, df = 14999, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 40
## 95 percent confidence interval:
##  36.39943 36.65297
## sample estimates:
## mean of x 
##   36.5262

chain <- subset(df,
                          restaurantType=="chain",select = reservations)
independent <- subset(df,
                          restaurantType=="independent",select = reservations)
tst <- t.test(chain, independent, var.equal = TRUE)
tst

## 
##  Two Sample t-test
## 
## data:  chain and independent
## t = 97.503, df = 14998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   9.872633 10.277718
## sample estimates:
## mean of x mean of y 
##  42.58205  32.50688

DAM quiz

Team 6

26 Sep

PART 2