#COLLEGE / COMPANY: Delhi Technological University

#Date: December 29, 2017

#EMAIL: deepankvarshney14@gmail.com

#NAME: Deepank Varshney

#Project Title: Email Marketing Campaign management

SYNOPSIS Analysis of the Effect of various factors on effective email marketing campaign

setwd("~/winter internship")
eml <- read.csv(paste("email_campaign.csv",sep=""))
 View(eml)

#Dimensions

dim(eml)
## [1] 68353    12

#Head

head(eml)
##            Email_ID Email_Type Subject_Hotness_Score Email_Source_Type
## 1 EMA00081000034500          1                   2.2                 2
## 2 EMA00081000045360          2                   2.1                 1
## 3 EMA00081000066290          2                   0.1                 1
## 4 EMA00081000076560          1                   3.0                 2
## 5 EMA00081000109720          1                   0.0                 2
## 6 EMA00081000131660          1                   1.5                 1
##   Customer_Location Email_Campaign_Type Total_Past_Communications
## 1                 E                   2                        33
## 2                                     2                        15
## 3                 B                   3                        36
## 4                 E                   2                        25
## 5                 C                   3                        18
## 6                 G                   2                        NA
##   Time_Email_sent_Category Word_Count Total_Links Total_Images
## 1                        1        440           8            0
## 2                        2        504           5            0
## 3                        2        962           5            0
## 4                        2        610          16            0
## 5                        2        947           4            0
## 6                        2        416          11            0
##   Email_Status
## 1            0
## 2            0
## 3            1
## 4            0
## 5            0
## 6            0

#Summary Statistics

library(psych)
describe(eml)
##                           vars     n     mean       sd  median  trimmed
## Email_ID*                    1 68353 34177.00 19731.96 34177.0 34177.00
## Email_Type                   2 68353     1.29     0.45     1.0     1.23
## Subject_Hotness_Score        3 68353     1.10     1.00     0.8     0.97
## Email_Source_Type            4 68353     1.46     0.50     1.0     1.45
## Customer_Location*           5 68353     5.34     2.57     6.0     5.55
## Email_Campaign_Type          6 68353     2.27     0.47     2.0     2.23
## Total_Past_Communications    7 61528    28.93    12.54    28.0    28.60
## Time_Email_sent_Category     8 68353     2.00     0.63     2.0     2.00
## Word_Count                   9 68353   699.93   271.72   694.0   699.05
## Total_Links                 10 66152    10.43     6.38     9.0     9.59
## Total_Images                11 66676     3.55     5.60     0.0     2.31
## Email_Status                12 68353     0.23     0.50     0.0     0.12
##                                mad min   max range  skew kurtosis    se
## Email_ID*                 25334.67   1 68353 68352  0.00    -1.20 75.47
## Email_Type                    0.00   1     2     1  0.95    -1.09  0.00
## Subject_Hotness_Score         1.04   0     5     5  0.90     0.08  0.00
## Email_Source_Type             0.00   1     2     1  0.17    -1.97  0.00
## Customer_Location*            2.97   1     8     7 -0.53    -1.10  0.01
## Email_Campaign_Type           0.00   1     3     2  0.71    -0.69  0.00
## Total_Past_Communications    13.34   0    67    67  0.21    -0.44  0.05
## Time_Email_sent_Category      0.00   1     3     2  0.00    -0.49  0.00
## Word_Count                  265.39  40  1316  1276  0.01    -0.35  1.04
## Total_Links                   4.45   1    49    48  1.39     2.50  0.02
## Total_Images                  0.00   0    45    45  2.09     5.03  0.02
## Email_Status                  0.00   0     2     2  2.08     3.53  0.00

#1Way Contingency Tables

table(eml$Email_Type)
## 
##     1     2 
## 48866 19487

Type 1 email is used more by the company

table(eml$Email_Source_Type)
## 
##     1     2 
## 37149 31204

Type 1 email source is used more by the company

table(eml$Email_Campaign_Type)
## 
##     1     2     3 
##   736 48273 19344

Type 2 email campaign is used more by the company

table(eml$Email_Status)
## 
##     0     1     2 
## 54941 11039  2373

0 - represents mails which weren’t read 1 - represents mails which were read 2 - represents mails which were read and replied to Clearly maximum people don’t open their mails

table(eml$Customer_Location)
## 
##           A     B     C     D     E     F     G 
## 11595  1454  4341  5758  7406 10193  4433 23173

the company has maximum customers in G area

table(eml$Time_Email_sent_Category)
## 
##     1     2     3 
## 13636 41129 13588

1- represents morning 2- represents afternoon 3- represents night

#2 way contingency tables

mytable1 <- xtabs(~ Email_Status + Time_Email_sent_Category,data = eml)
addmargins(mytable1)
##             Time_Email_sent_Category
## Email_Status     1     2     3   Sum
##          0   10970 33062 10909 54941
##          1    2186  6631  2222 11039
##          2     480  1436   457  2373
##          Sum 13636 41129 13588 68353

Maximum mails are sent in afternoon and email status doesn’t much depend on time

mytable1 <- xtabs(~ Email_Status + Customer_Location ,data = eml)
addmargins(mytable1)
##             Customer_Location
## Email_Status           A     B     C     D     E     F     G   Sum
##          0    9351  1160  3502  4654  5950  8136  3579 18609 54941
##          1    1848   245   714   890  1206  1693   698  3745 11039
##          2     396    49   125   214   250   364   156   819  2373
##          Sum 11595  1454  4341  5758  7406 10193  4433 23173 68353

Maximum mails are sent in G area

mytable1 <- xtabs(~ Email_Status + Email_Campaign_Type
 ,data = eml)
addmargins(mytable1)
##             Email_Campaign_Type
## Email_Status     1     2     3   Sum
##          0      77 42115 12749 54941
##          1     486  5446  5107 11039
##          2     173   712  1488  2373
##          Sum   736 48273 19344 68353

Campaign 3 seems to be most promising

mytable1 <- xtabs(~ Email_Status +Email_Source_Type  ,data = eml)
addmargins(mytable1)
##             Email_Source_Type
## Email_Status     1     2   Sum
##          0   29577 25364 54941
##          1    6150  4889 11039
##          2    1422   951  2373
##          Sum 37149 31204 68353
mytable1 <- xtabs(~ Email_Status + Email_Type ,data = eml)
addmargins(mytable1)
##             Email_Type
## Email_Status     1     2   Sum
##          0   39004 15937 54941
##          1    8208  2831 11039
##          2    1654   719  2373
##          Sum 48866 19487 68353

Type 1 seems more promising

#Comparitive averages of various factors by email status

mean1 <- aggregate(eml$Word_Count,by=list(eml$Email_Status),mean)
mean1
##   Group.1        x
## 1       0 725.2568
## 2       1 590.9396
## 3       2 620.6153
library(lattice)
barchart(Group.1 ~ x,data=mean1, main="averages of Word Count by email Status(1.UnRead;2.read;3.Read And Replied")

For people to read and reply, average word used in mail is 620. Less than this and more than this results in less attractive email

mean2 <-aggregate(eml$Total_Images,by=list(eml$Email_Status),mean,na.rm=TRUE)
mean2
##   Group.1        x
## 1       0 3.617575
## 2       1 3.187361
## 3       2 3.690415
library(lattice)
barchart(Group.1 ~ x,data=mean2, main="averages of Images by email Status(1.UnRead;2.read;3.Read And Replied")

For people to read and reply, average images used in mail is 3.7.Less than this results in less attractive email

mean3<-aggregate(eml$Total_Links,by=list(eml$Email_Status),mean,na.rm=TRUE)
mean3
##   Group.1         x
## 1       0 10.543199
## 2       1  9.854375
## 3       2 10.473638
library(lattice)
barchart(Group.1 ~ x,data=mean3, main="averages of links by email Status(1.UnRead;2.read;3.Read And Replied")

For people to read and reply, average links used in a mail is 10.47. Less than this and more than this results in less attractive email

mean4<-aggregate(eml$Total_Past_Communications,by=list(eml$Email_Status),mean,na.rm=TRUE)
mean4
##   Group.1        x
## 1       0 27.40013
## 2       1 34.70782
## 3       2 37.59680
library(lattice)
barchart(Group.1 ~ x,data=mean4, main="averages of past comm. by email Status(1.UnRead;2.read;3.Read And Replied")

For people to read and reply, average past communications in mail is 34.7 .Less than this results in less attractive email

mean5<-aggregate(eml$Subject_Hotness_Score,by=list(eml$Email_Status),mean)
mean5
##   Group.1         x
## 1       0 1.1595584
## 2       1 0.9075188
## 3       2 0.4863043
library(lattice)
barchart(Group.1 ~ x,data=mean5, main="averages of subject score by email Status(1.UnRead;2.read;3.Read And Replied")

For people to read and reply, average subject hotness score is 0.486. More than this results in less attractive email

#Boxplots

boxplot(eml$Total_Images ~ eml$Email_Status, horizontal=TRUE,
    xlab="Number of images in the mail",ylab="Email Status" ,las=1,
     )

boxplot(eml$Total_Past_Communications ~ eml$Email_Status, horizontal=TRUE,
    xlab="Number of past communications with the recipient in the mail",ylab="Email Status" ,las=1,
     )

For people to read and reply, past commnications should be higher

boxplot(eml$Total_Links ~ eml$Email_Status, horizontal=TRUE,
    xlab="Number of links in the mail",ylab="Email Status" ,las=1,
     )

boxplot(eml$Word_Count ~ eml$Email_Status, horizontal=TRUE,
    xlab="Number of words in the mail",ylab="Email Status" ,las=1,
     )

For people to read and reply, number of words should be higher

boxplot(eml$Subject_Hotness_Score ~ eml$Email_Status, horizontal=TRUE,
    xlab="Subject hotness score",ylab="Email Status" ,las=1,
     )

For people to read and reply, subject hotness should be lower

#Histograms

hist(eml$Total_Past_Communications,main="Total Past Comunnications frequency",col="lightblue")

hist(eml$Total_Images,main="Total Images frequency",col="lightblue")

hist(eml$Total_Links,main="Total links frequency",col="lightblue")

hist(eml$Subject_Hotness_Score,main="subject hotness score frequency",col="lightblue")

hist(eml$Word_Count,main="word count frequency",col="lightblue")

#histogram using library lattice

library(lattice)
histogram(~ Customer_Location | Email_Status ,data = eml,col="brown")

percent of total emails by location is nearly same in all the 3 cases of email status(read;unread;read and replied)

library(lattice)
histogram(~ Email_Type | Email_Status ,data = eml,col="brown")

percent of total emails by email type is not same in all the 3 cases of email status(read;unread;read and replied). For status read and replied, type 2 email type is used more

library(lattice)
histogram(~ Email_Campaign_Type | Email_Status ,data = eml,col="brown")

percent of total emails by Email Campaign type is not same in all the 3 cases of email status(read;unread;read and replied).For status read and replied, type 3 email campaign is used more

library(lattice)
histogram(~ Email_Source_Type | Email_Status ,data = eml,col="brown")

percent of total emails by email source type is not same in all the 3 cases of email status(read;unread;read and replied).For status read and replied, type 1 email source type is used more

library(lattice)
histogram(~ Time_Email_sent_Category | Email_Status ,data = eml,col="brown")

percent of total emails by time at which it is sent is nearly same in all the 3 cases of email status(read;unread;read and replied)

#plots

plot(jitter(eml$Email_Status), jitter(eml$Subject_Hotness_Score) )

plot(jitter(eml$Email_Status), jitter(eml$Word_Count) )

plot(jitter(eml$Email_Status), jitter(eml$Total_Past_Communications) )

plot(jitter(eml$Email_Status), jitter(eml$Total_Links) )

plot(jitter(eml$Email_Status), jitter(eml$Total_Images) )

round(cor(eml[,c(2:4,6:12)]),2)
##                           Email_Type Subject_Hotness_Score
## Email_Type                      1.00                 -0.24
## Subject_Hotness_Score          -0.24                  1.00
## Email_Source_Type              -0.28                  0.01
## Email_Campaign_Type             0.23                 -0.55
## Total_Past_Communications         NA                    NA
## Time_Email_sent_Category        0.00                  0.00
## Word_Count                      0.08                 -0.24
## Total_Links                       NA                    NA
## Total_Images                      NA                    NA
## Email_Status                   -0.02                 -0.15
##                           Email_Source_Type Email_Campaign_Type
## Email_Type                            -0.28                0.23
## Subject_Hotness_Score                  0.01               -0.55
## Email_Source_Type                      1.00                0.04
## Email_Campaign_Type                    0.04                1.00
## Total_Past_Communications                NA                  NA
## Time_Email_sent_Category               0.00                0.00
## Word_Count                             0.05                0.06
## Total_Links                              NA                  NA
## Total_Images                             NA                  NA
## Email_Status                          -0.02                0.19
##                           Total_Past_Communications
## Email_Type                                       NA
## Subject_Hotness_Score                            NA
## Email_Source_Type                                NA
## Email_Campaign_Type                              NA
## Total_Past_Communications                         1
## Time_Email_sent_Category                         NA
## Word_Count                                       NA
## Total_Links                                      NA
## Total_Images                                     NA
## Email_Status                                     NA
##                           Time_Email_sent_Category Word_Count Total_Links
## Email_Type                                       0       0.08          NA
## Subject_Hotness_Score                            0      -0.24          NA
## Email_Source_Type                                0       0.05          NA
## Email_Campaign_Type                              0       0.06          NA
## Total_Past_Communications                       NA         NA          NA
## Time_Email_sent_Category                         1       0.00          NA
## Word_Count                                       0       1.00          NA
## Total_Links                                     NA         NA           1
## Total_Images                                    NA         NA          NA
## Email_Status                                     0      -0.17          NA
##                           Total_Images Email_Status
## Email_Type                          NA        -0.02
## Subject_Hotness_Score               NA        -0.15
## Email_Source_Type                   NA        -0.02
## Email_Campaign_Type                 NA         0.19
## Total_Past_Communications           NA           NA
## Time_Email_sent_Category            NA         0.00
## Word_Count                          NA        -0.17
## Total_Links                         NA           NA
## Total_Images                         1           NA
## Email_Status                        NA         1.00
library(corrgram)
corrgram(eml,order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt)

#Hypothesis- Effect of various factors on effective email marketing campaign where maximum mails sent are read or read and replied to. #Chi Square Tests

chisq.test(eml$Email_Status, eml$Email_Type)
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Email_Type
## X-squared = 54.842, df = 2, p-value = 1.234e-12

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Email_Campaign_Type)
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Email_Campaign_Type
## X-squared = 6733.5, df = 4, p-value < 2.2e-16

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Email_Source_Type)
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Email_Source_Type
## X-squared = 43.859, df = 2, p-value = 2.993e-10

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Total_Past_Communications)
## Warning in chisq.test(eml$Email_Status, eml$Total_Past_Communications):
## Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Total_Past_Communications
## X-squared = 8778.7, df = 126, p-value < 2.2e-16

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Total_Links)
## Warning in chisq.test(eml$Email_Status, eml$Total_Links): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Total_Links
## X-squared = 3092.2, df = 72, p-value < 2.2e-16

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Total_Images)
## Warning in chisq.test(eml$Email_Status, eml$Total_Images): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Total_Images
## X-squared = 134.77, df = 88, p-value = 0.0009949

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Word_Count)
## Warning in chisq.test(eml$Email_Status, eml$Word_Count): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Word_Count
## X-squared = 3960.9, df = 300, p-value < 2.2e-16

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Subject_Hotness_Score)
## Warning in chisq.test(eml$Email_Status, eml$Subject_Hotness_Score): Chi-
## squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Subject_Hotness_Score
## X-squared = 3579.8, df = 100, p-value < 2.2e-16

Null hypothesis is rejected, they are correlated

chisq.test(eml$Email_Status, eml$Time_Email_sent_Category)
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Time_Email_sent_Category
## X-squared = 1.1065, df = 4, p-value = 0.8932

Null hypothesis is accepted, they are not correlated since p-value is >0.05

chisq.test(eml$Email_Status, eml$Customer_Location)
## 
##  Pearson's Chi-squared test
## 
## data:  eml$Email_Status and eml$Customer_Location
## X-squared = 11.73, df = 14, p-value = 0.6279

Null hypothesis is accepted, they are not correlated since p-value is >0.05

#T tests

t.test(eml$Email_Status, eml$Email_Type)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Email_Type
## t = -410.45, df = 135460, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.059194 -1.049126
## sample estimates:
## mean of x mean of y 
## 0.2309335 1.2850936

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Email_Campaign_Type)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Email_Campaign_Type
## t = -781.21, df = 136240, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.046422 -2.036179
## sample estimates:
## mean of x mean of y 
## 0.2309335 2.2722338

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Email_Source_Type)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Email_Source_Type
## t = -455.36, df = 136700, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.230854 -1.220304
## sample estimates:
## mean of x mean of y 
## 0.2309335 1.4565125

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Total_Past_Communications)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Total_Past_Communications
## t = -567.5, df = 61701, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.80145 -28.60319
## sample estimates:
##  mean of x  mean of y 
##  0.2309335 28.9332499

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Total_Links)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Total_Links
## t = -409.73, df = 66927, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.24738 -10.14981
## sample estimates:
##  mean of x  mean of y 
##  0.2309335 10.4295259

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Total_Images)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Total_Images
## t = -152.57, df = 67701, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.362391 -3.277097
## sample estimates:
## mean of x mean of y 
## 0.2309335 3.5506779

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Word_Count)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Word_Count
## t = -673.24, df = 68352, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -701.7379 -697.6638
## sample estimates:
##   mean of x   mean of y 
##   0.2309335 699.9317513

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Subject_Hotness_Score)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Subject_Hotness_Score
## t = -202.8, df = 100320, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.8729027 -0.8561918
## sample estimates:
## mean of x mean of y 
## 0.2309335 1.0954808

Null hypothesis is rejected, there is significant difference in their means

t.test(eml$Email_Status, eml$Time_Email_sent_Category)
## 
##  Welch Two Sample t-test
## 
## data:  eml$Email_Status and eml$Time_Email_sent_Category
## t = -575.52, df = 129590, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.774387 -1.762342
## sample estimates:
## mean of x mean of y 
## 0.2309335 1.9992978

Null hypothesis is rejected, there is significant difference in their means

x1 <- read.csv("email_campaign.csv",header=T,na.strings=c(""))
View(x1)

#Number of missing values in each column

sapply(x1,function(x) sum(is.na(x)))
##                  Email_ID                Email_Type 
##                         0                         0 
##     Subject_Hotness_Score         Email_Source_Type 
##                         0                         0 
##         Customer_Location       Email_Campaign_Type 
##                     11595                         0 
## Total_Past_Communications  Time_Email_sent_Category 
##                      6825                         0 
##                Word_Count               Total_Links 
##                         0                      2201 
##              Total_Images              Email_Status 
##                      1677                         0

#Number of unique values in each column

sapply(x1, function(x) length(unique(x)))
##                  Email_ID                Email_Type 
##                     68353                         2 
##     Subject_Hotness_Score         Email_Source_Type 
##                        51                         2 
##         Customer_Location       Email_Campaign_Type 
##                         8                         3 
## Total_Past_Communications  Time_Email_sent_Category 
##                        65                         3 
##                Word_Count               Total_Links 
##                       151                        38 
##              Total_Images              Email_Status 
##                        46                         3

#Missing Values vs Observed

library(Amelia)
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(x1, main = "Missing values vs observed")

#Adjusting for the missing values

x1$Total_Past_Communications[is.na(x1$Total_Past_Communications)] <- mean(x1$Total_Past_Communications,na.rm=T)


x1$Total_Images[is.na(x1$Total_Images)] <- mean(x1$Total_Images,na.rm=T)



x1$Total_Links[is.na(x1$Total_Links)] <- mean(x1$Total_Links,na.rm=T)

#Multinomial Logistic Regression

#Model1

library(nnet)
model1 <- multinom(Email_Status ~Email_Type + Email_Source_Type + Subject_Hotness_Score + Total_Past_Communications + Total_Images + Total_Links + Word_Count + Email_Campaign_Type , data = x1)
## # weights:  30 (18 variable)
## initial  value 75093.445767 
## iter  10 value 55666.211481
## iter  20 value 48219.812614
## iter  30 value 37664.590816
## final  value 36700.424898 
## converged
summary(model1)
## Call:
## multinom(formula = Email_Status ~ Email_Type + Email_Source_Type + 
##     Subject_Hotness_Score + Total_Past_Communications + Total_Images + 
##     Total_Links + Word_Count + Email_Campaign_Type, data = x1)
## 
## Coefficients:
##   (Intercept) Email_Type Email_Source_Type Subject_Hotness_Score
## 1   -1.744351 -0.3990973       -0.09244969            -0.1116074
## 2   -2.357175 -0.3788657       -0.26763771            -0.8572034
##   Total_Past_Communications Total_Images Total_Links   Word_Count
## 1                0.01988197  0.004920025 -0.02341395 -0.001721480
## 2                0.02199097  0.011561206 -0.01076142 -0.001586621
##   Email_Campaign_Type
## 1           0.7094060
## 2           0.5025526
## 
## Std. Errors:
##   (Intercept) Email_Type Email_Source_Type Subject_Hotness_Score
## 1  0.11537644 0.02700582        0.02245943            0.01654177
## 2  0.01142859 0.04598975        0.04208503            0.03404208
##   Total_Past_Communications Total_Images Total_Links   Word_Count
## 1               0.001209576  0.003099949 0.002779562 5.098161e-05
## 2               0.001902322  0.005822980 0.005272876 8.675143e-05
##   Email_Campaign_Type
## 1          0.02878372
## 2          0.04373899
## 
## Residual Deviance: 73400.85 
## AIC: 73436.85

#model2

library(nnet)
model2 <- multinom(Email_Status ~  Subject_Hotness_Score + Total_Past_Communications + Total_Images + Total_Links + Word_Count + Email_Campaign_Type , data = x1)
## # weights:  24 (14 variable)
## initial  value 75093.445767 
## iter  10 value 55916.752578
## iter  20 value 39695.126815
## iter  30 value 36847.544934
## iter  40 value 36841.948101
## final  value 36838.518566 
## converged
summary(model2)
## Call:
## multinom(formula = Email_Status ~ Subject_Hotness_Score + Total_Past_Communications + 
##     Total_Images + Total_Links + Word_Count + Email_Campaign_Type, 
##     data = x1)
## 
## Coefficients:
##   (Intercept) Subject_Hotness_Score Total_Past_Communications Total_Images
## 1   -2.426955           -0.06898217                0.02335415  0.004469296
## 2   -3.260972           -0.81295725                0.02599598  0.011863567
##    Total_Links   Word_Count Email_Campaign_Type
## 1 -0.019787692 -0.001672989           0.6326252
## 2 -0.008730013 -0.001535253           0.4216581
## 
## Std. Errors:
##   (Intercept) Subject_Hotness_Score Total_Past_Communications Total_Images
## 1  0.10274726            0.01625189               0.001184220  0.003096449
## 2  0.01174025            0.03204970               0.001906138  0.005828354
##   Total_Links   Word_Count Email_Campaign_Type
## 1 0.002759127 5.065930e-05          0.02811320
## 2 0.005270137 8.488129e-05          0.03515953
## 
## Residual Deviance: 73677.04 
## AIC: 73705.04

#model3

library(nnet)
model3 <- multinom(Email_Status ~  Subject_Hotness_Score +Total_Past_Communications+   Word_Count + Total_Images + Email_Campaign_Type , data = x1)
## # weights:  21 (12 variable)
## initial  value 75093.445767 
## iter  10 value 55776.419192
## iter  20 value 39923.210209
## iter  30 value 36905.816124
## iter  40 value 36864.892535
## iter  40 value 36864.892204
## iter  40 value 36864.892204
## final  value 36864.892204 
## converged
 summary(model3)
## Call:
## multinom(formula = Email_Status ~ Subject_Hotness_Score + Total_Past_Communications + 
##     Word_Count + Total_Images + Email_Campaign_Type, data = x1)
## 
## Coefficients:
##   (Intercept) Subject_Hotness_Score Total_Past_Communications   Word_Count
## 1   -2.528275           -0.07320525                0.02238641 -0.001711529
## 2   -3.298855           -0.81623065                0.02551313 -0.001552492
##   Total_Images Email_Campaign_Type
## 1 -0.012027182           0.6388005
## 2  0.004518791           0.4225487
## 
## Std. Errors:
##   (Intercept) Subject_Hotness_Score Total_Past_Communications   Word_Count
## 1  0.10243115            0.01631211               0.001179262 5.049971e-05
## 2  0.01171327            0.03151871               0.001868996 8.307023e-05
##   Total_Images Email_Campaign_Type
## 1  0.002084386          0.02816958
## 2  0.003814353          0.03494645
## 
## Residual Deviance: 73729.78 
## AIC: 73753.78

#We can see that model 1 is the best fit model since it has least residual deviance

#Value prediction of the model

predict(model1,x1[c(1:1000),])
##    [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##   [35] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##   [69] 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
##  [103] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [137] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [171] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [205] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [239] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [273] 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [307] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [341] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [375] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0
##  [409] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [443] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [477] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [511] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
##  [545] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [579] 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [613] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [647] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [681] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [715] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [749] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [783] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [817] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [851] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [885] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [919] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [953] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [987] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Levels: 0 1 2

#Probabilty prediction of the model

predict(model1,x1[c(1:250),],type="prob")
##             0          1           2
## 1   0.7994347 0.18870321 0.011862040
## 2   0.8872405 0.10469920 0.008060293
## 3   0.7868262 0.16282683 0.050346964
## 4   0.8941847 0.10188840 0.003926873
## 5   0.7916789 0.16534014 0.042981009
## 6   0.7792162 0.19545938 0.025324439
## 7   0.6812883 0.30870792 0.010003753
## 8   0.9339780 0.05472462 0.011297329
## 9   0.8506947 0.13639569 0.012909652
## 10  0.8325265 0.12936454 0.038108988
## 11  0.7629825 0.18561695 0.051400557
## 12  0.9040086 0.07668014 0.019311292
## 13  0.6242989 0.28900289 0.086698165
## 14  0.9334508 0.06524469 0.001304550
## 15  0.8996868 0.09587990 0.004433333
## 16  0.8976367 0.09373305 0.008630229
## 17  0.8709431 0.11904924 0.010007623
## 18  0.9164345 0.06745793 0.016107523
## 19  0.8641325 0.10738471 0.028482749
## 20  0.7962893 0.18548533 0.018225381
## 21  0.8007478 0.15181595 0.047436202
## 22  0.8355161 0.14080658 0.023677328
## 23  0.5903011 0.32846626 0.081232619
## 24  0.9505582 0.04673992 0.002701897
## 25  0.9186483 0.07438351 0.006968180
## 26  0.8965194 0.09124814 0.012232476
## 27  0.7510600 0.18899379 0.059946226
## 28  0.8991881 0.09013886 0.010673033
## 29  0.7850925 0.20428789 0.010619591
## 30  0.8937606 0.09075157 0.015487848
## 31  0.7342064 0.22608943 0.039704146
## 32  0.5336437 0.36918157 0.097174760
## 33  0.8398262 0.13785382 0.022319961
## 34  0.8712404 0.09964071 0.029118930
## 35  0.9052508 0.07861932 0.016129896
## 36  0.8317971 0.16610496 0.002097907
## 37  0.7646116 0.22086966 0.014518720
## 38  0.5233914 0.37170759 0.104901026
## 39  0.8898010 0.09968804 0.010511004
## 40  0.4772321 0.39411555 0.128652390
## 41  0.8105773 0.17383037 0.015592375
## 42  0.8845984 0.10315328 0.012248314
## 43  0.9217975 0.07431488 0.003887642
## 44  0.9395194 0.05329981 0.007180820
## 45  0.8597951 0.11516282 0.025042067
## 46  0.7175675 0.23399930 0.048433169
## 47  0.6168754 0.27657401 0.106550620
## 48  0.8058703 0.17901518 0.015114521
## 49  0.8031730 0.16315189 0.033675139
## 50  0.8739108 0.10168296 0.024406283
## 51  0.7888110 0.18844690 0.022742119
## 52  0.7764843 0.19316454 0.030351121
## 53  0.6715543 0.23207308 0.096372608
## 54  0.8597942 0.10816467 0.032041174
## 55  0.9258159 0.06705464 0.007129481
## 56  0.8996437 0.09740757 0.002948721
## 57  0.8171831 0.17642617 0.006390758
## 58  0.7935609 0.19733884 0.009100218
## 59  0.8625735 0.09476753 0.042658997
## 60  0.7978934 0.16214559 0.039961054
## 61  0.8816845 0.09943326 0.018882218
## 62  0.8991296 0.09183396 0.009036434
## 63  0.8520174 0.12558841 0.022394157
## 64  0.8745080 0.11785080 0.007641182
## 65  0.9376944 0.06029867 0.002006912
## 66  0.8248725 0.15250051 0.022626956
## 67  0.7079917 0.25742734 0.034580980
## 68  0.8303002 0.13869852 0.031001251
## 69  0.8286654 0.13684028 0.034494359
## 70  0.8220704 0.15726280 0.020666795
## 71  0.4118437 0.45312278 0.135033537
## 72  0.9322045 0.06110630 0.006689217
## 73  0.9015989 0.09018898 0.008212087
## 74  0.3550399 0.50666982 0.138290282
## 75  0.5581604 0.32499753 0.116842112
## 76  0.9163870 0.07543874 0.008174302
## 77  0.8516384 0.12934823 0.019013384
## 78  0.8503985 0.13874528 0.010856238
## 79  0.9083472 0.08659881 0.005054022
## 80  0.8882492 0.10997127 0.001779568
## 81  0.7831074 0.16730592 0.049586660
## 82  0.7829502 0.18121489 0.035834943
## 83  0.8181711 0.15512594 0.026702944
## 84  0.9111626 0.08569785 0.003139536
## 85  0.9273157 0.06388122 0.008803113
## 86  0.6925191 0.28175212 0.025728816
## 87  0.8839191 0.10744889 0.008631974
## 88  0.8656274 0.12566431 0.008708323
## 89  0.8585198 0.11970326 0.021776933
## 90  0.3750867 0.50311413 0.121799158
## 91  0.8336228 0.14464907 0.021728165
## 92  0.9406155 0.05367000 0.005714551
## 93  0.6565558 0.26721791 0.076226303
## 94  0.9013341 0.09709491 0.001570994
## 95  0.9188008 0.07078413 0.010415047
## 96  0.6691154 0.25365491 0.077229682
## 97  0.7589752 0.19452290 0.046501905
## 98  0.8642685 0.11102342 0.024708111
## 99  0.8639685 0.10779094 0.028240537
## 100 0.8868225 0.09987918 0.013298320
## 101 0.6029462 0.32304337 0.074010467
## 102 0.9175720 0.07157018 0.010857860
## 103 0.8226498 0.15661078 0.020739371
## 104 0.8110053 0.18298506 0.006009673
## 105 0.7368868 0.19520900 0.067904220
## 106 0.5414560 0.35790952 0.100634457
## 107 0.7168210 0.23072587 0.052453177
## 108 0.7313510 0.18701919 0.081629789
## 109 0.6077993 0.30947421 0.082726453
## 110 0.7681067 0.18082240 0.051070859
## 111 0.6061836 0.29081037 0.103006036
## 112 0.9098594 0.08598020 0.004160452
## 113 0.8509468 0.13204283 0.017010342
## 114 0.8304453 0.13753522 0.032019524
## 115 0.8738766 0.11022549 0.015897891
## 116 0.8140457 0.14851128 0.037443042
## 117 0.7694220 0.20015447 0.030423527
## 118 0.8844747 0.10459343 0.010931898
## 119 0.7032862 0.24501647 0.051697288
## 120 0.8563424 0.12993280 0.013724829
## 121 0.8557625 0.11327153 0.030965958
## 122 0.8761564 0.10297225 0.020871332
## 123 0.7156842 0.23888519 0.045430567
## 124 0.8771888 0.11996511 0.002846064
## 125 0.7078937 0.23784946 0.054256827
## 126 0.8608856 0.11372559 0.025388830
## 127 0.7432554 0.19417238 0.062572254
## 128 0.9110823 0.07999770 0.008919961
## 129 0.6793869 0.26755026 0.053062877
## 130 0.8909160 0.08854778 0.020536257
## 131 0.7470816 0.19863522 0.054283186
## 132 0.8818443 0.10234806 0.015807623
## 133 0.7317886 0.23221497 0.035996472
## 134 0.9015272 0.08351457 0.014958231
## 135 0.5572867 0.35108079 0.091632464
## 136 0.9277982 0.06354196 0.008659830
## 137 0.9042865 0.07397748 0.021735974
## 138 0.6129477 0.29435629 0.092696039
## 139 0.6186020 0.29612395 0.085274008
## 140 0.9027243 0.07237283 0.024902897
## 141 0.7065439 0.23098264 0.062473439
## 142 0.8556328 0.11194532 0.032421838
## 143 0.8900613 0.10079316 0.009145508
## 144 0.5256850 0.37884625 0.095468728
## 145 0.6054727 0.30346206 0.091065284
## 146 0.9171321 0.06999101 0.012876852
## 147 0.7781250 0.17511472 0.046760266
## 148 0.9067645 0.07719690 0.016038607
## 149 0.5733338 0.34885580 0.077810425
## 150 0.8110802 0.16763753 0.021282298
## 151 0.7900728 0.18370984 0.026217384
## 152 0.6136275 0.31697214 0.069400348
## 153 0.8246090 0.15371789 0.021673071
## 154 0.6589650 0.25745025 0.083584721
## 155 0.8601830 0.12926644 0.010550513
## 156 0.8353377 0.12769507 0.036967225
## 157 0.8231643 0.13821903 0.038616643
## 158 0.8114634 0.16778580 0.020750796
## 159 0.5550993 0.35381949 0.091081199
## 160 0.8830565 0.11166628 0.005277229
## 161 0.5476790 0.34639033 0.105930638
## 162 0.9629018 0.03176017 0.005338017
## 163 0.9553495 0.04070324 0.003947262
## 164 0.6562856 0.26143815 0.082276222
## 165 0.9190037 0.05830522 0.022691100
## 166 0.5311385 0.37468213 0.094179368
## 167 0.9255814 0.06519092 0.009227677
## 168 0.9108564 0.06558863 0.023555011
## 169 0.8294097 0.14593953 0.024650729
## 170 0.9247922 0.06869228 0.006515487
## 171 0.7211574 0.21941031 0.059432328
## 172 0.8350814 0.12831836 0.036600191
## 173 0.8733258 0.11130409 0.015370150
## 174 0.5994711 0.28820726 0.112321633
## 175 0.8804894 0.10252088 0.016989691
## 176 0.8988317 0.08290761 0.018260666
## 177 0.7661628 0.18695831 0.046878917
## 178 0.9481249 0.04397490 0.007900178
## 179 0.8450440 0.15154153 0.003414477
## 180 0.8721303 0.09222892 0.035640798
## 181 0.4414992 0.42022707 0.138273679
## 182 0.7587850 0.18254316 0.058671813
## 183 0.8009278 0.14947575 0.049596417
## 184 0.8804995 0.11064005 0.008860471
## 185 0.9302343 0.05749523 0.012270465
## 186 0.7095972 0.21892924 0.071473564
## 187 0.7478191 0.19288501 0.059295840
## 188 0.6056358 0.31176858 0.082595654
## 189 0.7669533 0.16491930 0.068127385
## 190 0.9504421 0.03601293 0.013544992
## 191 0.6335416 0.27818028 0.088278119
## 192 0.7549031 0.18992951 0.055167390
## 193 0.8142402 0.14063260 0.045127160
## 194 0.6465204 0.29909295 0.054386626
## 195 0.8670788 0.12512084 0.007800351
## 196 0.9280393 0.06572036 0.006240295
## 197 0.7103166 0.23381420 0.055869248
## 198 0.9499509 0.04530185 0.004747285
## 199 0.8879090 0.09711760 0.014973391
## 200 0.8447750 0.15194745 0.003277596
## 201 0.9595555 0.03683425 0.003610250
## 202 0.8352167 0.12463081 0.040152501
## 203 0.9000328 0.08886075 0.011106486
## 204 0.8920527 0.09600793 0.011939404
## 205 0.3708325 0.48773241 0.141435074
## 206 0.8470597 0.11269609 0.040244249
## 207 0.9300680 0.05323318 0.016698847
## 208 0.9231273 0.05877929 0.018093413
## 209 0.8557700 0.10396241 0.040267628
## 210 0.8133273 0.15347002 0.033202687
## 211 0.5876095 0.32635584 0.086034630
## 212 0.7808546 0.17398164 0.045163778
## 213 0.7875909 0.16683506 0.045574080
## 214 0.9266210 0.06171750 0.011661496
## 215 0.7303928 0.20888832 0.060718890
## 216 0.8882928 0.10684629 0.004860949
## 217 0.8051601 0.17507984 0.019760088
## 218 0.8288521 0.13370663 0.037441221
## 219 0.8738252 0.10866440 0.017510431
## 220 0.9513406 0.04383167 0.004827732
## 221 0.7711366 0.19719873 0.031664631
## 222 0.8819245 0.08640262 0.031672911
## 223 0.8878947 0.10627873 0.005826605
## 224 0.9499919 0.04260709 0.007401007
## 225 0.7016932 0.28951365 0.008793193
## 226 0.7756352 0.17475891 0.049605931
## 227 0.8180010 0.15363377 0.028365258
## 228 0.8776260 0.09178730 0.030586745
## 229 0.6304827 0.28787916 0.081638165
## 230 0.7255362 0.21831263 0.056151201
## 231 0.9292908 0.06417201 0.006537209
## 232 0.6978738 0.26551930 0.036606860
## 233 0.7703275 0.21282769 0.016844820
## 234 0.9093400 0.06667031 0.023989681
## 235 0.8210865 0.14225589 0.036657649
## 236 0.7082252 0.25205919 0.039715650
## 237 0.8162781 0.12745115 0.056270775
## 238 0.8072259 0.13935522 0.053418918
## 239 0.9029757 0.09210324 0.004921074
## 240 0.8471856 0.11471311 0.038101250
## 241 0.8808709 0.09948099 0.019648095
## 242 0.8036361 0.15226930 0.044094598
## 243 0.8200968 0.16705286 0.012850346
## 244 0.9468003 0.04917718 0.004022506
## 245 0.8624065 0.12904168 0.008551844
## 246 0.7609966 0.17939806 0.059605327
## 247 0.8621722 0.10666506 0.031162767
## 248 0.6394304 0.26724466 0.093324908
## 249 0.8954822 0.09645938 0.008058391
## 250 0.8322736 0.14189734 0.025829045

#Misclassification error

cm <- table(predict(model1),x1$Email_Status)
print(cm)
##    
##         0     1     2
##   0 54409 10449  2227
##   1   532   590   146
##   2     0     0     0

Exact match values are the ones on the major diagonal

1- sum(diag(cm))/sum(cm)
## [1] 0.1953682

Therefore model misclassifies 19.53% of the times which proves the model to be a good fit

#Test to study significance of coefficients using p-values

z <- summary(model1)$coefficients/summary(model1)$standard.errors

p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
##   (Intercept)   Email_Type Email_Source_Type Subject_Hotness_Score
## 1           0 0.000000e+00      3.850088e-05          1.509304e-11
## 2           0 2.220446e-16      2.024756e-10          0.000000e+00
##   Total_Past_Communications Total_Images Total_Links Word_Count
## 1                         0    0.1124829  0.00000000          0
## 2                         0    0.0470950  0.04126058          0
##   Email_Campaign_Type
## 1                   0
## 2                   0

#CONCLUSION Email Status(read; unread; read and replied) depends on the factors:- Email Type , Email Source Type , Subject Hotness Score , Total Past Communications , Total Images , Total Links , Word Count And Email Campaign Type

1)Out of these for an email to be read and replied to Statistically Significant(p value<0.05) are- -Email Type -Email Source Type -Subject Hotness score -Total Past Communications -Total images -Total Links -Word Count -Email Campaign Type

 Statistically insignificant are- 

None

2)Out of these for an email to be read Statistically Significant are- -Email Type -Email Source Type -Subject Hotness score -Total Past Communications -Total Links -Word Count -Email Campaign Type

Statistically insignificant are- 

-Total images

Email Status(read; unread; read and replied)does not depend on the factors:- Customer Location and Time Email Sent