IJEC_Final_Version.R

library(mapproj)  #  map

## Warning: package 'mapproj' was built under R version 3.4.2

## Loading required package: maps

## Warning: package 'maps' was built under R version 3.4.2

library(reshape2) #  melt
library(nparcomp) #  gao_cs

## Warning: package 'nparcomp' was built under R version 3.4.2

## Loading required package: multcomp

## Warning: package 'multcomp' was built under R version 3.4.2

## Loading required package: mvtnorm

## Loading required package: survival

## Loading required package: TH.data

## Warning: package 'TH.data' was built under R version 3.4.2

## Loading required package: MASS

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

library(car)  # leveneTest and Anova Type III

## Warning: package 'car' was built under R version 3.4.2

library(heplots) #  etasquared

## Warning: package 'heplots' was built under R version 3.4.2

library(MASS) #  lda
library(psy) #  cronbach
library(igraph) # network graphs

## Warning: package 'igraph' was built under R version 3.4.2

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(lsr) # partial eta squared
library(psych) # KMO

## 
## Attaching package: 'psych'

## The following object is masked from 'package:psy':
## 
##     wkappa

## The following object is masked from 'package:car':
## 
##     logit

library(biotools) # M Box test

## Warning: package 'biotools' was built under R version 3.4.2

## Loading required package: rpanel

## Warning: package 'rpanel' was built under R version 3.4.2

## Loading required package: tcltk

## Package `rpanel', version 1.1-3: type help(rpanel) for summary information

## Loading required package: tkrplot

## Loading required package: lattice

## Loading required package: SpatialEpi

## Warning: package 'SpatialEpi' was built under R version 3.4.2

## Loading required package: sp

## Warning: package 'sp' was built under R version 3.4.2

## 
## Attaching package: 'SpatialEpi'

## The following object is masked from 'package:igraph':
## 
##     normalize

## ---
## biotools version 3.1

##

## 
## Attaching package: 'biotools'

## The following object is masked from 'package:heplots':
## 
##     boxM

library(vcd) # goodfit

## Warning: package 'vcd' was built under R version 3.4.2

## Loading required package: grid

library(agricolae)

## Warning: package 'agricolae' was built under R version 3.4.2

## 
## Attaching package: 'agricolae'

## The following object is masked from 'package:igraph':
## 
##     similarity

library(lavaan) # SEM4

## Warning: package 'lavaan' was built under R version 3.4.2

## This is lavaan 0.5-23.1097

## lavaan is BETA software! Please report any bugs.

## 
## Attaching package: 'lavaan'

## The following object is masked from 'package:psych':
## 
##     cor2cov

library(semPlot) # SEM graph

## Warning: package 'semPlot' was built under R version 3.4.2

library(Hmisc) # correlation matrix

## Warning: package 'Hmisc' was built under R version 3.4.2

## Loading required package: Formula

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:psych':
## 
##     describe

## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

library(MVN) # multivariate normality

## Warning: package 'MVN' was built under R version 3.4.2

## sROC 0.1-2 loaded

## 
## Attaching package: 'MVN'

## The following object is masked from 'package:psych':
## 
##     mardia

library(plyr) # count

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize

## The following object is masked from 'package:maps':
## 
##     ozone

## PRETEST 1
cat("\014")  # cleans screen

rm(list=ls(all=TRUE))  # remove variables in working memory
setwd("C:/Users/Erik Ernesto Vazquez/Downloads/Results")  # sets working directory

Pretest<-read.csv("Main_Study/Main_study__3x3_United_States.csv", skip=2, header=F)  # reads raw data from Qualtrics
NamesandHeaders<-read.csv("Main_Study/Main_study__3x3_United_States.csv")  # assigns headers and names to data frame
names(Pretest)<-names(NamesandHeaders)
Pretest$V6<-as.character(Pretest$V6)
Pretest<-Pretest[which(!duplicated(Pretest$V6)&Pretest$t2.frmwrk_3>0&Pretest$t12_3>0),]  # This procedure displays a freq. table and a bar plot showing grouping' without IPs duplicates
framework.wide=data.frame(Pretest[1],Pretest[34:36],Pretest[596:598],Pretest[603:604])
names(framework.wide)<-c("Subject","Credence","Experience","Search","Age","Gender","Income","Education","RE")
table(framework.wide$Gender)

## 
##   1   2 
## 539 506

women<-subset(framework.wide,framework.wide$Gender==1)
men<-subset(framework.wide,framework.wide$Gender==2)
Pretest<-rbind(women[1:60,],men[1:60,])

table(Pretest$Gender)

## 
##  1  2 
## 60 60

mean(Pretest$Age)-2014

## [1] -31.25

sd(Pretest$Age)

## [1] 8.758429

aggregate(Pretest$Age,list(Pretest$Gender),mean)

##   Group.1        x
## 1       1 1983.067
## 2       2 1982.433

aggregate(Pretest$Age,list(Pretest$Gender),sd)

##   Group.1        x
## 1       1 8.938332
## 2       2 8.638437

women<-subset(Pretest,Pretest$Gender==1)
men<-subset(Pretest,Pretest$Gender==2)
t.test(women$Age,men$Age) ## groups are equivalent in Age

## 
##  Welch Two Sample t-test
## 
## data:  women$Age and men$Age
## t = 0.39466, df = 117.86, p-value = 0.6938
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.544578  3.811245
## sample estimates:
## mean of x mean of y 
##  1983.067  1982.433

summary(Pretest[,2:4])

##     Credence       Experience        Search     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.750   1st Qu.:3.000   1st Qu.:3.000  
##  Median :7.000   Median :4.000   Median :3.000  
##  Mean   :6.125   Mean   :4.675   Mean   :4.117  
##  3rd Qu.:8.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000

t.test(Pretest$Credence,Pretest$Experience) ## Validation of SEC levels of ease to evaluate quality

## 
##  Welch Two Sample t-test
## 
## data:  Pretest$Credence and Pretest$Experience
## t = 5.0975, df = 237.96, p-value = 7.019e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.8896311 2.0103689
## sample estimates:
## mean of x mean of y 
##     6.125     4.675

t.test(Pretest$Experience,Pretest$Search) ## The chance of error type 2 is very small according to Winter 2013

## 
##  Welch Two Sample t-test
## 
## data:  Pretest$Experience and Pretest$Search
## t = 2.0762, df = 235.43, p-value = 0.03896
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02853859 1.08812808
## sample estimates:
## mean of x mean of y 
##  4.675000  4.116667

framework.long<-melt(Pretest,id.vars=c("Subject","Age","Gender","Income","Education","RE"),measure.vars=c("Credence", "Experience", "Search" ),variable.name="Framework", value.name="Measurement")
aggregate(framework.long$Measurement,list(framework.long$Framework),sd)

##      Group.1        x
## 1   Credence 2.217435
## 2 Experience 2.189212
## 3     Search 1.971150

## PRETEST 2
cat("\014")  # cleans screen

rm(list=ls(all=TRUE))  # remove variables in working memory
setwd("C:/Users/Erik Ernesto Vazquez/Downloads/IJEC Data recollection")  # sets working directory

MainStudy<-read.csv("Pretest Analysis Tie Strength and Media Richness.csv", header=T)  # reads raw data from Qualtrics
MainStudy<-subset(MainStudy,MainStudy$X3<1991&MainStudy$X1_15>0&MainStudy$X2_15>0&MainStudy$X3_15>0&MainStudy$X4_15>0&MainStudy$X5_15>0)
table(MainStudy$V3)

## 
##  9 10 
## 25 26

MainStudyF<-subset(MainStudy,MainStudy$V3==9)
MainStudyM<-subset(MainStudy,MainStudy$V3==10)
MainStudy<-rbind(MainStudyF[1:25,],MainStudyM[1:25,])
table(MainStudy$V3)

## 
##  9 10 
## 25 25

mean(MainStudy$X3)-2014

## [1] -31.88

sd(MainStudy$X3)

## [1] 8.66294

##Reliability content Vividness
MainStudyMelt1<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","X1_1","X1_2","X1_3",
                                         "X1_4","X1_5","X1_6","X1_7",
                                         "X1_15"),
                     measure.vars=c("X1_1","X1_2","X1_3",
                                    "X1_4","X1_5","X1_6","X1_7",
                                    "X1_15"),
                     variable.name="MediaRichness1", value.name="MRItem1")
MainStudyMelt2<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","X2_1","X2_2","X2_3",
                                         "X2_4","X2_5","X2_6","X2_7",
                                         "X2_15"),
                     measure.vars=c("X2_1","X2_2","X2_3",
                                    "X2_4","X2_5","X2_6","X2_7",
                                    "X2_15"),
                     variable.name="MediaRichness2", value.name="MRItem2")
MainStudyMelt3<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","X3_1","X3_2","X3_3",
                                         "X3_4","X3_5","X3_6","X3_7",
                                         "X3_15"),
                     measure.vars=c("X3_1","X3_2","X3_3",
                                    "X3_4","X3_5","X3_6","X3_7",
                                    "X3_15"),
                     variable.name="MediaRichness3", value.name="MRItem3")
cronbach(cbind(MainStudyMelt1$MRItem1,MainStudyMelt2$MRItem2,MainStudyMelt3$MRItem3)) ## Cronabch 0.81

## $sample.size
## [1] 400
## 
## $number.of.items
## [1] 3
## 
## $alpha
## [1] 0.8172619

## Reliability Tie Strength
MainStudyMelt4<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","X4_1","X4_2","X4_3",
                                         "X4_4","X4_5","X4_6","X4_7",
                                         "X4_15"),
                     measure.vars=c("X4_1","X4_2","X4_3",
                                    "X4_4","X4_5","X4_6","X4_7",
                                    "X4_15"),
                     variable.name="TieStr1", value.name="TieStrItem1")
MainStudyMelt5<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","X5_1","X5_2","X5_3",
                                         "X5_4","X5_5","X5_6","X5_7",
                                         "X5_15"),
                     measure.vars=c("X5_1","X5_2","X5_3",
                                    "X5_4","X5_5","X5_6","X5_7",
                                    "X5_15"),
                     variable.name="TieStr2", value.name="TieStrItem2")
cronbach(cbind(MainStudyMelt4$TieStrItem1,MainStudyMelt5$TieStrItem2)) ## Cronabch 0.89

## $sample.size
## [1] 400
## 
## $number.of.items
## [1] 2
## 
## $alpha
## [1] 0.8946946

validity<-data.frame(cbind(MainStudyMelt1$MRItem1,MainStudyMelt2$MRItem2,MainStudyMelt3$MRItem3,MainStudyMelt4$TieStrItem1,MainStudyMelt5$TieStrItem2))
mardiaTest(validity)

##    Mardia's Multivariate Normality Test 
## --------------------------------------- 
##    data : validity 
## 
##    g1p            : 2.383954 
##    chi.skew       : 158.9302 
##    p.value.skew   : 1.019388e-17 
## 
##    g2p            : 36.6415 
##    z.kurtosis     : 1.961972 
##    p.value.kurt   : 0.04976572 
## 
##    chi.small.skew : 160.5225 
##    p.value.small  : 5.406671e-18 
## 
##    Result          : Data are not multivariate normal. 
## ---------------------------------------

KMO(validity)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = validity)
## Overall MSA =  0.79
## MSA for each item = 
##   X1   X2   X3   X4   X5 
## 0.88 0.82 0.83 0.75 0.73

factanal(validity,2,rotation="varimax")

## 
## Call:
## factanal(x = validity, factors = 2, rotation = "varimax")
## 
## Uniquenesses:
##    X1    X2    X3    X4    X5 
## 0.533 0.295 0.309 0.322 0.005 
## 
## Loadings:
##    Factor1 Factor2
## X1 0.636   0.249  
## X2 0.800   0.254  
## X3 0.694   0.457  
## X4 0.384   0.729  
## X5 0.309   0.948  
## 
##                Factor1 Factor2
## SS loadings      1.769   1.766
## Proportion Var   0.354   0.353
## Cumulative Var   0.354   0.707
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 0.23 on 1 degree of freedom.
## The p-value is 0.633

summary(prcomp(validity)) ## Two components explain 69% of the variance

## Importance of components%s:
##                           PC1    PC2     PC3     PC4     PC5
## Standard deviation     4.7071 2.2192 1.57373 1.41028 1.16880
## Proportion of Variance 0.6732 0.1496 0.07525 0.06043 0.04151
## Cumulative Proportion  0.6732 0.8228 0.89807 0.95849 1.00000

screeplot(prcomp(validity),type="lines")

biplot(prcomp(validity,scale.=T),cex=0.5,xlabs=rep(".",nrow(validity)))

rcorr(as.matrix(validity))

##      X1   X2   X3   X4   X5
## X1 1.00 0.57 0.56 0.43 0.43
## X2 0.57 1.00 0.67 0.50 0.49
## X3 0.56 0.67 1.00 0.59 0.65
## X4 0.43 0.50 0.59 1.00 0.81
## X5 0.43 0.49 0.65 0.81 1.00
## 
## n= 400 
## 
## 
## P
##    X1 X2 X3 X4 X5
## X1     0  0  0  0
## X2  0     0  0  0
## X3  0  0     0  0
## X4  0  0  0     0
## X5  0  0  0  0

MainStudy$MRFacebook<-(MainStudy$X3_1+MainStudy$X1_1+MainStudy$X2_1)/3-38
MainStudy$MRTwitter<-(MainStudy$X3_2+MainStudy$X1_2+MainStudy$X2_2)/3-38
MainStudy$MRYouTube<-(MainStudy$X3_3+MainStudy$X1_3+MainStudy$X2_3)/3-38
MainStudy$MRInstagram<-(MainStudy$X3_4+MainStudy$X1_4+MainStudy$X2_4)/3-38
MainStudy$MRPinterest<-(MainStudy$X3_5+MainStudy$X1_5+MainStudy$X2_5)/3-38
MainStudy$MRSnapChat<-(MainStudy$X3_6+MainStudy$X1_6+MainStudy$X2_6)/3-38
MainStudy$MRLinkedIn<-(MainStudy$X3_7+MainStudy$X1_7+MainStudy$X2_7)/3-38
MainStudy$MRSecondLife<-(MainStudy$X3_15+MainStudy$X1_15+MainStudy$X2_15)/3-38

MainStudy$TSFacebook<-(MainStudy$X4_1+MainStudy$X5_1)/2-38
MainStudy$TSTwitter<-(MainStudy$X4_2+MainStudy$X5_2)/2-38
MainStudy$TSYouTube<-(MainStudy$X4_3+MainStudy$X5_3)/2-38
MainStudy$TSInstagram<-(MainStudy$X4_4+MainStudy$X5_4)/2-38
MainStudy$TSPinterest<-(MainStudy$X4_5+MainStudy$X5_5)/2-38
MainStudy$TSSnapChat<-(MainStudy$X4_6+MainStudy$X5_6)/2-38
MainStudy$TSLinkedIn<-(MainStudy$X4_7+MainStudy$X5_7)/2-38
MainStudy$TSSecondLife<-(MainStudy$X4_15+MainStudy$X5_15)/2-38

summary(MainStudy)

##           StartDate            EndDate       Status           IPAddress 
##  9/28/2017 5:29: 3   9/28/2017 2:26: 2   Min.   :0   37.187.147.158: 2  
##  9/28/2017 1:44: 2   9/28/2017 3:43: 2   1st Qu.:0   1.22.132.15   : 1  
##  9/28/2017 3:20: 2   9/28/2017 5:02: 2   Median :0   103.204.47.33 : 1  
##  9/28/2017 4:51: 2   9/28/2017 5:06: 2   Mean   :0   103.25.47.134 : 1  
##  9/28/2017 5:52: 2   9/28/2017 5:36: 2   3rd Qu.:0   103.88.77.3   : 1  
##  9/28/2017 8:30: 2   9/28/2017 1:21: 1   Max.   :0   106.51.152.46 : 1  
##  (Other)       :37   (Other)       :39               (Other)       :43  
##     Progress   Duration..in.seconds.    Finished         RecordedDate
##  Min.   :100   Min.   : 374.0        Min.   :1   9/28/2017 2:26: 2   
##  1st Qu.:100   1st Qu.: 424.2        1st Qu.:1   9/28/2017 3:43: 2   
##  Median :100   Median : 490.0        Median :1   9/28/2017 5:02: 2   
##  Mean   :100   Mean   : 582.4        Mean   :1   9/28/2017 5:06: 2   
##  3rd Qu.:100   3rd Qu.: 634.2        3rd Qu.:1   9/28/2017 5:36: 2   
##  Max.   :100   Max.   :1753.0        Max.   :1   9/28/2017 1:21: 1   
##                                                  (Other)       :39   
##              ResponseId RecipientLastName RecipientFirstName
##  R_10T8rIxyUdDqUvY: 1   Mode:logical      Mode:logical      
##  R_1BoX1ncuNMXu7TD: 1   NA's:50           NA's:50           
##  R_1CazBZ3AMwO2Xwb: 1                                       
##  R_1f2xJMMytF6btHR: 1                                       
##  R_1hALgNb68Qa7d9i: 1                                       
##  R_1i2LOTcQ6vqrYPd: 1                                       
##  (Other)          :44                                       
##  RecipientEmail ExternalReference LocationLatitude LocationLongitude
##  Mode:logical   Mode:logical      Min.   : 8.00    Min.   :-122.68  
##  NA's:50        NA's:50           1st Qu.:13.08    1st Qu.: -71.70  
##                                   Median :15.92    Median :  77.09  
##                                   Mean   :23.26    Mean   :  29.41  
##                                   3rd Qu.:39.12    3rd Qu.:  80.28  
##                                   Max.   :53.75    Max.   : 121.02  
##                                                                     
##  DistributionChannel UserLanguage t0_First.Click   t0_Last.Click   
##  anonymous:50          : 2        Min.   : 0.000   Min.   : 0.000  
##                      EN:48        1st Qu.: 0.000   1st Qu.: 0.000  
##                                   Median : 0.000   Median : 0.000  
##                                   Mean   : 2.289   Mean   : 4.524  
##                                   3rd Qu.: 0.000   3rd Qu.: 0.000  
##                                   Max.   :36.752   Max.   :59.745  
##                                                                    
##  t0_Page.Submit   t0_Click.Count      X0B_Browser        X0B_Version
##  Min.   : 15.97   Min.   :0.00   Chrome     :35   61.0.3163.100:20  
##  1st Qu.: 17.50   1st Qu.:0.00   Firefox    :12   55           : 8  
##  Median : 20.49   Median :0.00   Safari     : 2   60.0.3112.113: 3  
##  Mean   : 43.52   Mean   :0.66   Safari iPad: 1   49.0.2623.112: 2  
##  3rd Qu.: 27.98   3rd Qu.:0.00   Chrome iPad: 0   5.1.7        : 2  
##  Max.   :416.32   Max.   :9.00   Edge       : 0   60.0.3112.90 : 2  
##                                  (Other)    : 0   (Other)      :13  
##       X0B_Operating.System   X0B_Resolution       V1           V2   
##  Windows NT 6.1 :19        1366x768 :19     Min.   :10   Min.   :9  
##  Windows NT 10.0:11        1280x800 : 6     1st Qu.:10   1st Qu.:9  
##  Android 6.0.1  : 3        1280x1024: 5     Median :10   Median :9  
##  Macintosh      : 3        1024x768 : 3     Mean   :10   Mean   :9  
##  Windows NT 5.1 : 3        360x640  : 3     3rd Qu.:10   3rd Qu.:9  
##  Windows NT 6.3 : 3        1440x900 : 2     Max.   :10   Max.   :9  
##  (Other)        : 8        (Other)  :12                             
##        V3                       V4     tV_First.Click   tV_Last.Click   
##  Min.   : 9.0   4,5,6,7,8,9,10   :13   Min.   : 1.250   Min.   : 9.257  
##  1st Qu.: 9.0   4,5,6,7,8,10     : 6   1st Qu.: 2.507   1st Qu.:17.028  
##  Median : 9.5   4,5,6            : 4   Median : 3.454   Median :21.335  
##  Mean   : 9.5   4,5,6,7,8,9,10,11: 3   Mean   : 4.995   Mean   :23.431  
##  3rd Qu.:10.0   4                : 2   3rd Qu.: 4.787   3rd Qu.:27.987  
##  Max.   :10.0   4,5,6,10         : 2   Max.   :52.247   Max.   :69.337  
##                 (Other)          :20                                    
##  tV_Page.Submit  tV_Click.Count       X1_1            X1_2      
##  Min.   :10.17   Min.   : 4.00   Min.   :39.00   Min.   :39.00  
##  1st Qu.:18.41   1st Qu.: 8.00   1st Qu.:44.00   1st Qu.:43.00  
##  Median :23.34   Median :10.00   Median :45.00   Median :45.00  
##  Mean   :25.40   Mean   :11.18   Mean   :44.76   Mean   :44.12  
##  3rd Qu.:30.00   3rd Qu.:11.00   3rd Qu.:46.00   3rd Qu.:45.75  
##  Max.   :75.09   Max.   :32.00   Max.   :47.00   Max.   :47.00  
##                                                                 
##       X1_3            X1_4            X1_5           X1_6      
##  Min.   :40.00   Min.   :39.00   Min.   :39.0   Min.   :39.00  
##  1st Qu.:44.00   1st Qu.:43.00   1st Qu.:43.0   1st Qu.:41.25  
##  Median :45.00   Median :45.00   Median :44.5   Median :43.00  
##  Mean   :45.08   Mean   :44.42   Mean   :44.1   Mean   :43.16  
##  3rd Qu.:47.00   3rd Qu.:46.00   3rd Qu.:46.0   3rd Qu.:44.75  
##  Max.   :47.00   Max.   :47.00   Max.   :47.0   Max.   :47.00  
##                                                                
##       X1_7           X1_15       t1_First.Click   t1_Last.Click    
##  Min.   :39.00   Min.   :39.00   Min.   : 0.836   Min.   :  4.256  
##  1st Qu.:41.00   1st Qu.:41.00   1st Qu.: 4.304   1st Qu.: 21.102  
##  Median :43.00   Median :43.00   Median : 6.212   Median : 39.944  
##  Mean   :43.14   Mean   :42.74   Mean   : 7.830   Mean   : 43.273  
##  3rd Qu.:45.00   3rd Qu.:44.75   3rd Qu.: 8.868   3rd Qu.: 60.008  
##  Max.   :47.00   Max.   :47.00   Max.   :33.204   Max.   :122.232  
##                                                                    
##  t1_Page.Submit   t1_Click.Count       X2_1            X2_2      
##  Min.   : 61.11   Min.   : 8.00   Min.   :41.00   Min.   :39.00  
##  1st Qu.: 62.59   1st Qu.: 9.00   1st Qu.:44.00   1st Qu.:42.25  
##  Median : 65.43   Median :11.50   Median :45.00   Median :44.50  
##  Mean   :103.60   Mean   :13.38   Mean   :44.82   Mean   :43.82  
##  3rd Qu.: 84.50   3rd Qu.:17.00   3rd Qu.:46.00   3rd Qu.:45.00  
##  Max.   :981.92   Max.   :31.00   Max.   :47.00   Max.   :47.00  
##                                                                  
##       X2_3           X2_4            X2_5            X2_6      
##  Min.   :41.0   Min.   :39.00   Min.   :39.00   Min.   :39.00  
##  1st Qu.:45.0   1st Qu.:42.25   1st Qu.:43.00   1st Qu.:40.25  
##  Median :46.0   Median :44.00   Median :44.50   Median :43.00  
##  Mean   :45.5   Mean   :43.80   Mean   :43.88   Mean   :42.30  
##  3rd Qu.:47.0   3rd Qu.:45.00   3rd Qu.:45.75   3rd Qu.:44.00  
##  Max.   :47.0   Max.   :47.00   Max.   :47.00   Max.   :47.00  
##                                                                
##       X2_7           X2_15       Q774_First.Click Q774_Last.Click  
##  Min.   :39.00   Min.   :39.00   Min.   : 0.692   Min.   :  3.404  
##  1st Qu.:41.25   1st Qu.:40.25   1st Qu.: 4.062   1st Qu.: 18.695  
##  Median :43.00   Median :43.00   Median : 7.470   Median : 33.911  
##  Mean   :42.96   Mean   :42.34   Mean   : 9.889   Mean   : 50.221  
##  3rd Qu.:45.00   3rd Qu.:44.00   3rd Qu.:12.105   3rd Qu.: 60.494  
##  Max.   :47.00   Max.   :47.00   Max.   :64.105   Max.   :555.504  
##                                                                    
##  Q774_Page.Submit Q774_Click.Count      X3_1            X3_2      
##  Min.   : 61.16   Min.   : 8.00    Min.   :39.00   Min.   :39.00  
##  1st Qu.: 62.66   1st Qu.: 8.00    1st Qu.:43.00   1st Qu.:40.25  
##  Median : 69.47   Median :11.00    Median :45.00   Median :43.00  
##  Mean   :102.26   Mean   :13.22    Mean   :44.34   Mean   :42.70  
##  3rd Qu.:102.92   3rd Qu.:14.75    3rd Qu.:46.00   3rd Qu.:45.00  
##  Max.   :555.58   Max.   :50.00    Max.   :47.00   Max.   :47.00  
##                                                                   
##       X3_3            X3_4            X3_5            X3_6      
##  Min.   :39.00   Min.   :39.00   Min.   :39.00   Min.   :39.00  
##  1st Qu.:43.00   1st Qu.:41.00   1st Qu.:42.00   1st Qu.:39.00  
##  Median :45.00   Median :43.00   Median :43.00   Median :41.00  
##  Mean   :44.46   Mean   :42.66   Mean   :43.18   Mean   :41.64  
##  3rd Qu.:46.00   3rd Qu.:44.75   3rd Qu.:45.00   3rd Qu.:43.75  
##  Max.   :47.00   Max.   :47.00   Max.   :47.00   Max.   :47.00  
##                                                                 
##       X3_7           X3_15       Q776_First.Click Q776_Last.Click  
##  Min.   :39.00   Min.   :39.00   Min.   : 0.849   Min.   :  3.421  
##  1st Qu.:40.25   1st Qu.:39.00   1st Qu.: 2.640   1st Qu.: 19.424  
##  Median :43.00   Median :39.00   Median : 5.031   Median : 30.837  
##  Mean   :42.46   Mean   :40.98   Mean   : 8.607   Mean   : 38.878  
##  3rd Qu.:44.00   3rd Qu.:42.75   3rd Qu.: 8.897   3rd Qu.: 59.546  
##  Max.   :47.00   Max.   :46.00   Max.   :72.620   Max.   :116.506  
##                                                                    
##  Q776_Page.Submit Q776_Click.Count      X4_1            X4_2      
##  Min.   : 61.13   Min.   : 8.00    Min.   :40.00   Min.   :39.00  
##  1st Qu.: 62.72   1st Qu.: 8.00    1st Qu.:44.25   1st Qu.:43.00  
##  Median : 71.14   Median :10.00    Median :46.00   Median :44.00  
##  Mean   : 96.13   Mean   :13.22    Mean   :45.40   Mean   :43.96  
##  3rd Qu.: 88.52   3rd Qu.:15.75    3rd Qu.:47.00   3rd Qu.:46.00  
##  Max.   :683.66   Max.   :42.00    Max.   :47.00   Max.   :47.00  
##                                                                   
##       X4_3            X4_4            X4_5            X4_6      
##  Min.   :39.00   Min.   :39.00   Min.   :39.00   Min.   :39.00  
##  1st Qu.:41.25   1st Qu.:42.25   1st Qu.:40.00   1st Qu.:39.00  
##  Median :44.00   Median :44.00   Median :42.00   Median :42.00  
##  Mean   :43.46   Mean   :43.66   Mean   :42.32   Mean   :42.12  
##  3rd Qu.:45.75   3rd Qu.:46.00   3rd Qu.:44.00   3rd Qu.:44.75  
##  Max.   :47.00   Max.   :47.00   Max.   :47.00   Max.   :47.00  
##                                                                 
##       X4_7           X4_15      Q778_First.Click Q778_Last.Click  
##  Min.   :39.00   Min.   :39.0   Min.   : 0.885   Min.   :  4.476  
##  1st Qu.:42.25   1st Qu.:39.0   1st Qu.: 4.983   1st Qu.: 27.854  
##  Median :44.00   Median :40.0   Median : 7.396   Median : 44.758  
##  Mean   :43.80   Mean   :41.4   Mean   : 9.577   Mean   : 46.593  
##  3rd Qu.:45.75   3rd Qu.:44.0   3rd Qu.:10.875   3rd Qu.: 60.206  
##  Max.   :47.00   Max.   :47.0   Max.   :65.250   Max.   :121.689  
##                                                                   
##  Q778_Page.Submit Q778_Click.Count      X5_1            X5_2      
##  Min.   : 61.08   Min.   : 8.00    Min.   :40.00   Min.   :39.00  
##  1st Qu.: 61.93   1st Qu.: 9.00    1st Qu.:45.00   1st Qu.:41.25  
##  Median : 62.95   Median :10.50    Median :46.00   Median :44.00  
##  Mean   : 73.21   Mean   :15.24    Mean   :45.36   Mean   :43.46  
##  3rd Qu.: 76.19   3rd Qu.:16.75    3rd Qu.:47.00   3rd Qu.:46.00  
##  Max.   :157.36   Max.   :66.00    Max.   :47.00   Max.   :47.00  
##                                                                   
##       X5_3            X5_4            X5_5            X5_6      
##  Min.   :39.00   Min.   :39.00   Min.   :39.00   Min.   :39.00  
##  1st Qu.:41.00   1st Qu.:41.00   1st Qu.:39.00   1st Qu.:39.00  
##  Median :44.00   Median :43.00   Median :42.00   Median :41.50  
##  Mean   :43.18   Mean   :42.96   Mean   :42.12   Mean   :41.96  
##  3rd Qu.:45.00   3rd Qu.:45.00   3rd Qu.:44.00   3rd Qu.:44.00  
##  Max.   :47.00   Max.   :47.00   Max.   :47.00   Max.   :47.00  
##                                                                 
##       X5_7           X5_15       Q780_First.Click Q780_Last.Click  
##  Min.   :39.00   Min.   :39.00   Min.   : 0.882   Min.   :  5.832  
##  1st Qu.:42.00   1st Qu.:39.00   1st Qu.: 4.543   1st Qu.: 32.826  
##  Median :43.00   Median :40.00   Median : 8.931   Median : 49.100  
##  Mean   :43.34   Mean   :40.94   Mean   : 9.971   Mean   : 52.914  
##  3rd Qu.:45.75   3rd Qu.:42.75   3rd Qu.:12.747   3rd Qu.: 62.276  
##  Max.   :47.00   Max.   :47.00   Max.   :40.273   Max.   :196.035  
##                                                                    
##  Q780_Page.Submit Q780_Click.Count       X3             X4      
##  Min.   : 60.93   Min.   : 8.00    Min.   :1949   Min.   :1.00  
##  1st Qu.: 62.53   1st Qu.: 9.00    1st Qu.:1979   1st Qu.:1.00  
##  Median : 64.97   Median :11.00    Median :1986   Median :3.50  
##  Mean   : 86.12   Mean   :15.42    Mean   :1982   Mean   :3.18  
##  3rd Qu.: 75.39   3rd Qu.:16.50    3rd Qu.:1988   3rd Qu.:5.00  
##  Max.   :505.77   Max.   :70.00    Max.   :1990   Max.   :7.00  
##                                                                 
##  t3_First.Click    t3_Last.Click     t3_Page.Submit    t3_Click.Count 
##  Min.   :  1.404   Min.   :  4.553   Min.   :  5.587   Min.   : 2.00  
##  1st Qu.:  2.312   1st Qu.:  7.345   1st Qu.:  8.748   1st Qu.: 2.00  
##  Median :  3.377   Median :  8.812   Median : 11.464   Median : 2.00  
##  Mean   :  9.071   Mean   : 16.427   Mean   : 18.759   Mean   : 2.94  
##  3rd Qu.:  5.055   3rd Qu.: 16.309   3rd Qu.: 18.027   3rd Qu.: 3.00  
##  Max.   :203.935   Max.   :205.335   Max.   :208.775   Max.   :13.00  
##                                                                       
##        X5              X6              X6_6_TEXT  t4_First.Click  
##  Min.   : 9.00   Min.   :1.00               :38   Min.   : 1.254  
##  1st Qu.:12.00   1st Qu.:2.00   Indian      : 4   1st Qu.: 2.792  
##  Median :12.00   Median :2.00   asian       : 2   Median : 3.466  
##  Mean   :12.06   Mean   :3.22   Asian       : 2   Mean   : 5.989  
##  3rd Qu.:13.00   3rd Qu.:4.75   Asian/Indian: 1   3rd Qu.: 4.179  
##  Max.   :15.00   Max.   :6.00   Mixed       : 1   Max.   :71.134  
##                                 (Other)     : 2                   
##  t4_Last.Click    t4_Page.Submit   t4_Click.Count    mTurkCode    
##  Min.   : 3.004   Min.   : 3.834   Min.   : 2.00   Min.   : 0.00  
##  1st Qu.: 5.530   1st Qu.: 7.016   1st Qu.: 2.00   1st Qu.:21.25  
##  Median : 7.705   Median : 9.520   Median : 2.50   Median :44.00  
##  Mean   :11.190   Mean   :16.241   Mean   : 3.78   Mean   :47.90  
##  3rd Qu.:12.505   3rd Qu.:16.907   3rd Qu.: 4.00   3rd Qu.:78.50  
##  Max.   :75.028   Max.   :85.504   Max.   :16.00   Max.   :99.00  
##                                                                   
##    MRFacebook      MRTwitter       MRYouTube      MRInstagram   
##  Min.   :2.333   Min.   :1.000   Min.   :3.667   Min.   :2.000  
##  1st Qu.:6.000   1st Qu.:4.333   1st Qu.:6.333   1st Qu.:4.333  
##  Median :6.667   Median :6.000   Median :7.333   Median :6.000  
##  Mean   :6.640   Mean   :5.547   Mean   :7.013   Mean   :5.627  
##  3rd Qu.:7.667   3rd Qu.:6.667   3rd Qu.:8.000   3rd Qu.:7.000  
##  Max.   :9.000   Max.   :8.333   Max.   :9.000   Max.   :9.000  
##                                                                 
##   MRPinterest      MRSnapChat      MRLinkedIn     MRSecondLife  
##  Min.   :1.667   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:2.417   1st Qu.:3.667   1st Qu.:2.417  
##  Median :6.000   Median :4.333   Median :5.000   Median :3.667  
##  Mean   :5.720   Mean   :4.367   Mean   :4.853   Mean   :4.020  
##  3rd Qu.:7.333   3rd Qu.:5.833   3rd Qu.:6.333   3rd Qu.:5.500  
##  Max.   :8.333   Max.   :8.667   Max.   :8.667   Max.   :8.667  
##                                                                 
##    TSFacebook      TSTwitter       TSYouTube      TSInstagram   
##  Min.   :2.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:6.625   1st Qu.:4.125   1st Qu.:3.125   1st Qu.:3.625  
##  Median :7.750   Median :6.000   Median :5.500   Median :5.500  
##  Mean   :7.380   Mean   :5.710   Mean   :5.320   Mean   :5.310  
##  3rd Qu.:9.000   3rd Qu.:7.500   3rd Qu.:7.500   3rd Qu.:7.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##                                                                 
##   TSPinterest     TSSnapChat      TSLinkedIn    TSSecondLife  
##  Min.   :1.00   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:2.00   1st Qu.:1.625   1st Qu.:4.50   1st Qu.:1.000  
##  Median :4.00   Median :3.500   Median :5.50   Median :2.250  
##  Mean   :4.22   Mean   :4.040   Mean   :5.57   Mean   :3.170  
##  3rd Qu.:6.00   3rd Qu.:6.500   3rd Qu.:7.50   3rd Qu.:4.875  
##  Max.   :9.00   Max.   :9.000   Max.   :9.00   Max.   :8.500  
##

t.test(MainStudy$MRYouTube,MainStudy$MRFacebook)

## 
##  Welch Two Sample t-test
## 
## data:  MainStudy$MRYouTube and MainStudy$MRFacebook
## t = 1.2293, df = 96.807, p-value = 0.2219
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2294196  0.9760863
## sample estimates:
## mean of x mean of y 
##  7.013333  6.640000

t.test(MainStudy$MRYouTube,MainStudy$MRTwitter)

## 
##  Welch Two Sample t-test
## 
## data:  MainStudy$MRYouTube and MainStudy$MRTwitter
## t = 4.5105, df = 93.294, p-value = 1.88e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.8209743 2.1123590
## sample estimates:
## mean of x mean of y 
##  7.013333  5.546667

t.test(MainStudy$MRFacebook,MainStudy$MRTwitter) ## Two levels of Content Vividness

## 
##  Welch Two Sample t-test
## 
## data:  MainStudy$MRFacebook and MainStudy$MRTwitter
## t = 3.2105, df = 96.688, p-value = 0.001799
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4174128 1.7692539
## sample estimates:
## mean of x mean of y 
##  6.640000  5.546667

mean(MainStudy$MRYouTube)

## [1] 7.013333

sd(MainStudy$MRYouTube)

## [1] 1.431679

mean(MainStudy$MRFacebook)

## [1] 6.64

sd(MainStudy$MRFacebook)

## [1] 1.60051

mean(MainStudy$MRTwitter)

## [1] 5.546667

sd(MainStudy$MRTwitter)

## [1] 1.799168

t.test(MainStudy$TSYouTube,MainStudy$TSFacebook)

## 
##  Welch Two Sample t-test
## 
## data:  MainStudy$TSYouTube and MainStudy$TSFacebook
## t = -4.743, df = 83.499, p-value = 8.585e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.923775 -1.196225
## sample estimates:
## mean of x mean of y 
##      5.32      7.38

t.test(MainStudy$TSYouTube,MainStudy$TSTwitter)

## 
##  Welch Two Sample t-test
## 
## data:  MainStudy$TSYouTube and MainStudy$TSTwitter
## t = -0.78715, df = 97.235, p-value = 0.4331
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.3733189  0.5933189
## sample estimates:
## mean of x mean of y 
##      5.32      5.71

t.test(MainStudy$TSFacebook,MainStudy$TSTwitter) ## Two levels of Tie Str

## 
##  Welch Two Sample t-test
## 
## data:  MainStudy$TSFacebook and MainStudy$TSTwitter
## t = 4.0882, df = 87.81, p-value = 9.599e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.8581796 2.4818204
## sample estimates:
## mean of x mean of y 
##      7.38      5.71

mean(MainStudy$TSYouTube)

## [1] 5.32

sd(MainStudy$TSYouTube)

## [1] 2.584806

mean(MainStudy$TSFacebook)

## [1] 7.38

sd(MainStudy$TSFacebook)

## [1] 1.658497

mean(MainStudy$TSTwitter)

## [1] 5.71

sd(MainStudy$TSTwitter)

## [1] 2.364901

MainStudyMelt1<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","TSFacebook","TSTwitter","TSYouTube","TSInstagram","TSPinterest","TSSnapChat","TSLinkedIn","TSSecondLife"),measure.vars=c("TSFacebook","TSTwitter","TSYouTube","TSInstagram","TSPinterest","TSSnapChat","TSLinkedIn","TSSecondLife"),variable.name="SMP", value.name="TieStrength")
MainStudyMelt2<-melt(MainStudy,id.vars=c("ResponseId","X3","V3","MRFacebook","MRTwitter","MRYouTube","MRInstagram","MRPinterest","MRSnapChat","MRLinkedIn","MRSecondLife"),measure.vars=c("MRFacebook","MRTwitter","MRYouTube","MRInstagram","MRPinterest","MRSnapChat","MRLinkedIn","MRSecondLife"),variable.name="SMP", value.name="MediaRichness")
MainStudyMelt<-cbind(MainStudyMelt1,MainStudyMelt2)

hist(MainStudyMelt$TieStrength)

plot(density(MainStudyMelt$TieStrength))

screeplot(prcomp(cbind(MainStudyMelt$SMP,MainStudyMelt$TieStrength),type="lines"))

## Warning: In prcomp.default(cbind(MainStudyMelt$SMP, MainStudyMelt$TieStrength), 
##     type = "lines") :
##  extra argument 'type' will be disregarded

summary(prcomp(cbind(MainStudyMelt$SMP,MainStudyMelt$TieStrength)))

## Importance of components%s:
##                           PC1    PC2
## Standard deviation     2.9096 1.9372
## Proportion of Variance 0.6928 0.3071
## Cumulative Proportion  0.6928 1.0000

## At least two components
mydata<-data.frame(MainStudyMelt$TieStrength)
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata, 
                                     centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares")

wss<-wss/sum(wss)*100
for (i in 2:15)
  wss[i]<-wss[i]+wss[i-1]
plot(1:15, wss, type="b", xlab="Number of Clusters",
     ylab="% Var explained")
wss

##  [1]  64.92153  81.23422  87.64483  91.53786  93.91399  95.40564  96.63822
##  [8]  97.32955  98.07574  98.64732  99.18572  99.46976  99.69873  99.87468
## [15] 100.00000

## 3 clusters explain more than 80% of the variance
abline(v=3,lty=2)

d <- dist(mydata,method="euclidean") # distance matrix
fit <- hclust(d, method="ward")

## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

plot(fit) # display dendogram all raw data
groups <- cutree(fit,k=3) # cut tree into 3 clusters
rect.hclust(fit,k=3,border="red") # draw dendogram with red borders around the 4 clusters

mydata2<-aggregate(MainStudyMelt$TieStrength,list(MainStudyMelt$SMP),mean)
rownames(mydata2)<-c("Facebook","Twitter","YouTube","Instagram","Pinterest",
                     "SnapChat","LinkedIn","SecondLife")
d<-dist(mydata2,method="euclidean") # distance matrix

## Warning in dist(mydata2, method = "euclidean"): NAs introduced by coercion

fit <- hclust(d, method="ward")

## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

plot(fit,ylab="Tie strength") # display dendogram mean by SMP
groups <- cutree(fit,k=3) # cut tree into 4 clusters
rect.hclust(fit,k=3,border="red") # draw dendogram with red borders around the 4 clusters

hist(MainStudyMelt$MediaRichness)

plot(density(MainStudyMelt$MediaRichness))

screeplot(prcomp(cbind(MainStudyMelt$SMP,MainStudyMelt$MediaRichness),type="lines"))

## Warning: In prcomp.default(cbind(MainStudyMelt$SMP, MainStudyMelt$MediaRichness), 
##     type = "lines") :
##  extra argument 'type' will be disregarded

summary(prcomp(cbind(MainStudyMelt$SMP,MainStudyMelt$MediaRichness)))

## Importance of components%s:
##                           PC1    PC2
## Standard deviation     2.5873 1.6883
## Proportion of Variance 0.7014 0.2987
## Cumulative Proportion  0.7014 1.0000

## At least two components
mydata<-data.frame(MainStudyMelt$MediaRichness)
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata, 
                                     centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares")

wss<-wss/sum(wss)*100
for (i in 2:15)
  wss[i]<-wss[i]+wss[i-1]
plot(1:15, wss, type="b", xlab="Number of Clusters",
     ylab="% Var explained")
wss

##  [1]  57.56473  72.88376  81.37180  86.12562  89.26672  91.71882  93.25541
##  [8]  94.94939  96.17952  96.84776  97.50254  98.42568  99.24339  99.69277
## [15] 100.00000

## 3 clusters explain more than 80% of the variance
abline(v=3,lty=2)

d <- dist(mydata,method="euclidean") # distance matrix
fit <- hclust(d, method="ward")

## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

plot(fit) # display dendogram all raw data
groups <- cutree(fit,k=3) # cut tree into 3 clusters
rect.hclust(fit,k=3,border="red") # draw dendogram with red borders around the 4 clusters

mydata2<-aggregate(MainStudyMelt$MediaRichness,list(MainStudyMelt$SMP),mean)
rownames(mydata2)<-c("Facebook","Twitter","YouTube","Instagram","Pinterest",
                     "SnapChat","LinkedIn","SecondLife")
d<-dist(mydata2,method="euclidean") # distance matrix

## Warning in dist(mydata2, method = "euclidean"): NAs introduced by coercion

fit <- hclust(d, method="ward")

## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

plot(fit,ylab="Content vividness") # display dendogram mean by SMP
groups <- cutree(fit,k=3) # cut tree into 4 clusters
rect.hclust(fit,k=3,border="red") # draw dendogram with red borders around the 4 clusters

## STUDY 1 (SECONDARY DATA)
cat("\014")  # cleans screen

rm(list=ls(all=TRUE))  # remove variables in working memory
setwd("C:/Users/Erik Ernesto Vazquez/Documents")  # sets working directory

X<-read.csv("E-Retailer.csv", skip=0, header=T)  # reads raw data from Qualtrics
X<-subset(X,X$Merchandise.Category=="Apparel/Accessories"|
            X$Merchandise.Category=="Computers/Electronics"|
            X$Merchandise.Category=="Health/Beauty")

X$ProdCat<-ifelse(X$Merchandise.Category=="Computers/Electronics","Search",
                  ifelse(X$Merchandise.Category=="Apparel/Accessories","Experience",
                         "Credence"))
X$ProdCatLvl<-ifelse(X$Merchandise.Category=="Computers/Electronics",3,
                     ifelse(X$Merchandise.Category=="Apparel/Accessories",2,
                            1))
X$WebOnly<-ifelse(X$Merchant.Type=="Web Only",1,0)
X$RetailChain<-ifelse(X$Merchant.Type=="Retail Chain",1,0)
X$ConsumerBM<-ifelse(X$Merchant.Type=="Consumer Brand Manufacturer",1,0)
X$CatalogCallCenter<-ifelse(X$Merchant.Type=="Catallog/Call Center",1,0)

X$ConsistencyLvl<-ifelse(X$Consistency=="Poor",1,
                      ifelse(X$Consistency=="Fair",2,
                             ifelse(X$Consistency=="Good",3,
                             4)))

X$PersonalizationBinary<-ifelse(X$Personalization=="",0,1)

X$ConsistencyLvl<-ifelse(X$Consistency=="Poor",1,
                         ifelse(X$Consistency=="Fair",2,
                                ifelse(X$Consistency=="Good",3,
                                       4)))
X$Mobile.Commerce.PlatformBinary<-ifelse(X$Mobile.Commerce.Platform=="",0,1)

X$X2011.Monthly.Visits<-X$X2011.Monthly.Visits*12

table(X$Merchandise.Category)

## 
##          Apparel/Accessories Automotive Parts/Accessories 
##                          139                            0 
##            Books/Music/Video        Computers/Electronics 
##                            0                           48 
##                Flowers/Gifts                    Food/Drug 
##                            0                            0 
##    Hardware/Home Improvement                Health/Beauty 
##                            0                           29 
##  Housewares/Home Furnishings                      Jewelry 
##                            0                            0 
##                Mass Merchant              Office Supplies 
##                            0                            0 
##        Specialty/Non-Apparel               Sporting Goods 
##                            0                            0 
##                 Toys/Hobbies 
##                            0

count(X,c("Merchandise.Category","Merchant.Type"))

##     Merchandise.Category               Merchant.Type freq
## 1    Apparel/Accessories         Catalog/Call Center   17
## 2    Apparel/Accessories Consumer Brand Manufacturer   34
## 3    Apparel/Accessories                Retail Chain   60
## 4    Apparel/Accessories                    Web Only   28
## 5  Computers/Electronics         Catalog/Call Center    7
## 6  Computers/Electronics Consumer Brand Manufacturer   12
## 7  Computers/Electronics                Retail Chain    9
## 8  Computers/Electronics                    Web Only   20
## 9          Health/Beauty         Catalog/Call Center    3
## 10         Health/Beauty Consumer Brand Manufacturer    3
## 11         Health/Beauty                Retail Chain    7
## 12         Health/Beauty                    Web Only   16

count(X,c("Merchandise.Category","Mobile.Commerce.PlatformBinary"))

##    Merchandise.Category Mobile.Commerce.PlatformBinary freq
## 1   Apparel/Accessories                              0   95
## 2   Apparel/Accessories                              1   44
## 3 Computers/Electronics                              0   39
## 4 Computers/Electronics                              1    9
## 5         Health/Beauty                              0   15
## 6         Health/Beauty                              1   14

aggregate(X$X2011.Monthly.Visits,list(X$Merchandise.Category),mean)

##                 Group.1         x
## 1   Apparel/Accessories  43070925
## 2 Computers/Electronics 183258367
## 3         Health/Beauty  27888090

aggregate(X$X2011.Monthly.Visits,list(X$Merchandise.Category),sd)

##                 Group.1         x
## 1   Apparel/Accessories  59335237
## 2 Computers/Electronics 868808208
## 3         Health/Beauty  64033836

fit<-(lm(log(X2011.Monthly.Visits)~ProdCatLvl+WebOnly+RetailChain+
             ConsumerBM+ConsistencyLvl+PersonalizationBinary+
           Mobile.Commerce.PlatformBinary,X))
summary(fit) ## H1a Not supported

## 
## Call:
## lm(formula = log(X2011.Monthly.Visits) ~ ProdCatLvl + WebOnly + 
##     RetailChain + ConsumerBM + ConsistencyLvl + PersonalizationBinary + 
##     Mobile.Commerce.PlatformBinary, data = X)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6626 -0.7814  0.0634  0.6329  5.3404 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    15.11536    0.47769  31.643  < 2e-16 ***
## ProdCatLvl                      0.27987    0.14933   1.874 0.062307 .  
## WebOnly                        -0.23732    0.29320  -0.809 0.419202    
## RetailChain                     0.72994    0.28390   2.571 0.010835 *  
## ConsumerBM                      0.47927    0.30414   1.576 0.116585    
## ConsistencyLvl                  0.18510    0.07134   2.594 0.010149 *  
## PersonalizationBinary           0.14626    0.17718   0.825 0.410034    
## Mobile.Commerce.PlatformBinary  0.72733    0.19239   3.780 0.000205 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.264 on 208 degrees of freedom
## Multiple R-squared:  0.1904, Adjusted R-squared:  0.1632 
## F-statistic: 6.988 on 7 and 208 DF,  p-value: 1.749e-07

aggregate(X$X2011.Conversion.Rate,list(X$Merchandise.Category),mean)

##                 Group.1          x
## 1   Apparel/Accessories 0.02956835
## 2 Computers/Electronics 0.02250000
## 3         Health/Beauty 0.05655172

aggregate(X$X2011.Conversion.Rate,list(X$Merchandise.Category),sd)

##                 Group.1          x
## 1   Apparel/Accessories 0.01735689
## 2 Computers/Electronics 0.01344809
## 3         Health/Beauty 0.03819834

fit<-(lm(X2011.Conversion.Rate~ProdCatLvl+WebOnly+RetailChain+
           ConsumerBM+ConsistencyLvl+PersonalizationBinary+
           Mobile.Commerce.PlatformBinary,X))
summary(fit) ## H1b Approved

## 
## Call:
## lm(formula = X2011.Conversion.Rate ~ ProdCatLvl + WebOnly + RetailChain + 
##     ConsumerBM + ConsistencyLvl + PersonalizationBinary + Mobile.Commerce.PlatformBinary, 
##     data = X)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.045984 -0.009485 -0.003161  0.007115  0.103892 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.089544   0.007047  12.708  < 2e-16 ***
## ProdCatLvl                     -0.015505   0.002203  -7.039 2.77e-11 ***
## WebOnly                        -0.017721   0.004325  -4.097 5.99e-05 ***
## RetailChain                    -0.029782   0.004188  -7.111 1.81e-11 ***
## ConsumerBM                     -0.028788   0.004486  -6.417 9.24e-10 ***
## ConsistencyLvl                 -0.001202   0.001052  -1.142    0.255    
## PersonalizationBinary          -0.003177   0.002614  -1.215    0.226    
## Mobile.Commerce.PlatformBinary  0.004473   0.002838   1.576    0.117    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01865 on 208 degrees of freedom
## Multiple R-squared:  0.3628, Adjusted R-squared:  0.3413 
## F-statistic: 16.92 on 7 and 208 DF,  p-value: < 2.2e-16

IJEC_Final_Version.R

Erik Ernesto Vazquez

Thu Feb 15 14:53:14 2018