setwd("~/Documents/BOPS Project")
data12 = read.csv("BOPS-FY12.csv")
data13 = read.csv("BOPS-FY13.csv")
data_trans<-read.table("transaction.txt", header= TRUE, sep= ",")
allbops=rbind(data12,data13)
data_trans$bops= ifelse(data_trans$transaction_id%in% allbops$transaction_id, 1,0)
summary(data_trans)
## customer_id purchase_date transaction_id
## Min. :1.003e+05 13DEC2012:00:00:00: 12989 Min. : 7198
## 1st Qu.:2.569e+07 12DEC2012:00:00:00: 12286 1st Qu.:2649367
## Median :3.015e+07 18DEC2012:00:00:00: 12234 Median :3355444
## Mean :2.944e+10 06DEC2012:00:00:00: 11614 Mean :3338930
## 3rd Qu.:3.499e+07 11DEC2012:00:00:00: 10228 3rd Qu.:4030256
## Max. :9.197e+11 22DEC2011:00:00:00: 9896 Max. :4702552
## (Other) :1602255
## store_number net_purchase_amount sku return
## Min. : 2.0 Min. : 0.00 Min. :10033405 Min. :0.000
## 1st Qu.: 2.0 1st Qu.: 45.13 1st Qu.:17859869 1st Qu.:0.000
## Median : 2.0 Median : 90.00 Median :18126698 Median :0.000
## Mean : 229.5 Mean : 170.33 Mean :18300993 Mean :0.101
## 3rd Qu.: 2.0 3rd Qu.: 189.00 3rd Qu.:18584417 3rd Qu.:0.000
## Max. :5998.0 Max. :39422.00 Max. :80006100 Max. :1.000
##
## return_date return_store time_to_return
## :1502656 Min. : 2.0 Min. : 0.0
## 02JUN2011:00:00:00: 921 1st Qu.: 2.0 1st Qu.: 8.0
## 21JUL2011:00:00:00: 803 Median : 2.0 Median : 17.0
## 25APR2012:00:00:00: 802 Mean : 756.7 Mean : 27.8
## 10JAN2013:00:00:00: 781 3rd Qu.:1479.0 3rd Qu.: 33.0
## 08JAN2013:00:00:00: 744 Max. :5998.0 Max. :1679.0
## (Other) : 164795 NA's :1502656 NA's :1502656
## gender age_band est_income_code ethnic_code
## :210507 Min. : 0.00 Min. :1.00 N :615387
## F:693941 1st Qu.: 0.00 1st Qu.:4.00 S :204830
## M:719292 Median : 5.00 Median :6.00 H :187647
## U: 47762 Mean : 4.95 Mean :5.42 Z :161313
## 3rd Qu.: 8.00 3rd Qu.:7.00 G :120011
## Max. :13.00 Max. :9.00 U : 71027
## NA's :72038 NA's :68664 (Other):311287
## homeowner_code length_of_residence child year
## : 68664 Min. : 0.00 : 68664 Min. :2010
## O:1064498 1st Qu.: 2.00 N:977479 1st Qu.:2011
## R: 538340 Median : 6.00 Y:625359 Median :2012
## Mean : 7.14 Mean :2012
## 3rd Qu.:13.00 3rd Qu.:2012
## Max. :15.00 Max. :2013
## NA's :68664
## month month_index summary bops
## DEC :425369 Min. :13.00 Min. : 1.00 Min. :0.0000
## NOV :187468 1st Qu.:22.00 1st Qu.: 5.00 1st Qu.:0.0000
## FEB :179716 Median :31.00 Median : 9.00 Median :0.0000
## MAY :143596 Mean :31.47 Mean :10.07 Mean :0.1664
## JAN :127777 3rd Qu.:41.00 3rd Qu.:12.00 3rd Qu.:0.0000
## APR :102339 Max. :48.00 Max. :21.00 Max. :1.0000
## (Other):505237 NA's :4
##Subseting the data to only BOPS implementaion period
data_df1=subset(data_trans, (month_index>=25) & (store_number==2|store_number==6))
data_df2=subset(data_trans, (month_index>=37) & (store_number==5998))
df_m4=rbind(data_df1,data_df2)
##rename the net_purchase_amount as price -->
colnames(df_m4)[colnames(df_m4)=="net_purchase_amount"] <- "price"
Creating Dummies
attach(df_m4)
#Create dummy for return
df_m4$return=ifelse(is.na(df_m4$time_to_return),0,1)
#create dummy variables for ethnic code
df_m4$white <- ifelse(df_m4$ethnic_code == "N"| df_m4$ethnic_code == "Z", 1, 0)
#create dummy variables for store number -->
df_m4$store2 <- ifelse(store_number == "2", 1, 0)
df_m4$store6 <- ifelse(store_number == "6", 1, 0)
df_m4$store5998 <- ifelse(store_number == "5998", 1, 0)
#create dummy variables for homeowner
df_m4$homeowner <- ifelse(homeowner_code == "O",1,ifelse(homeowner_code == "R",0,NA))
#create dummy variables for male
df_m4$male <- ifelse(gender == "M",1,ifelse(gender == "F",0,NA))
#create child variables for male
df_m4$kid <- ifelse(child == "Y",1,ifelse(child == "N",0,NA))
#create dummy variables for product catagory -->
df_m4$bridal <- ifelse(summary == "1", 1, 0)
df_m4$goldwedbands <- ifelse(summary == "2", 1, 0)
df_m4$solitaires <- ifelse(summary == "3", 1, 0)
df_m4$dfashion <- ifelse(summary == "4", 1, 0)
df_m4$semiprecious <- ifelse(summary == "5", 1, 0)
df_m4$mens <- ifelse(summary == "6", 1, 0)
df_m4$goldearrings <- ifelse(summary == "7", 1, 0)
df_m4$specialevent <- ifelse(summary == "8", 1, 0)
df_m4$beads <- ifelse(summary == "9", 1, 0)
df_m4$piercings <- ifelse(summary == "10", 1, 0)
df_m4$dsoljewelry <- ifelse(summary == "11", 1, 0)
df_m4$gold <- ifelse(summary == "12", 1, 0)
df_m4$watch <- ifelse(summary == "13", 1, 0)
df_m4$preowned <- ifelse(summary == "14", 1, 0)
df_m4$specialized <- ifelse(summary == "15", 1, 0)
df_m4$events <- ifelse(summary == "17", 1, 0)
df_m4$diawedband <- ifelse(summary == "20", 1, 0)
df_m4$silver <- ifelse(summary == "21", 1, 0)
#create dummy variables for month -->
df_m4$JAN <- ifelse(month == "JAN", 1, 0)
df_m4$FEB <- ifelse(month == "FEB", 1, 0)
df_m4$MAR <- ifelse(month == "MAR", 1, 0)
df_m4$APR <- ifelse(month == "APR", 1, 0)
df_m4$MAY <- ifelse(month == "MAY", 1, 0)
df_m4$JUN <- ifelse(month == "JUN", 1, 0)
df_m4$JUL <- ifelse(month == "JUL", 1, 0)
df_m4$AUG <- ifelse(month == "AUG", 1, 0)
df_m4$SEP <- ifelse(month == "SEP", 1, 0)
df_m4$OCT <- ifelse(month == "OCT", 1, 0)
df_m4$NOV <- ifelse(month == "NOV", 1, 0)
df_m4$DEC <- ifelse(month == "DEC", 1, 0)
##Create fiscal year variables and month index
df_m4$month_index2=df_m4$month_index%%12
df_m4$y_2011 <- ifelse(df_m4$year == "2011", 1, 0)
df_m4$y_2012 <- ifelse(df_m4$year == "2012", 1, 0)
df_m4$y_2013 <- ifelse(df_m4$year == "2013", 1, 0)
df_m4$fiscalyr <- ifelse(13<= df_m4$month_index & df_m4$month_index <=24, 11, ifelse(25<= df_m4$month_index & df_m4$month_index <= 36, 12, 13))
write.csv(df_m4, "df_m4.csv")
df_m4 = read.csv("df_m4.csv")
``` ##Data Aggregatioin To investigate impact of BOPS on consumer return behavior. We will use transaction level data. The reason why we do not aggregate the data to consumer level is that, return is a behavior that we can observe in the level of single transaction as compared to purchases.
#install.packages("VIF")
library(VIF)
#install.packages("usdm")
library(usdm)
## Loading required package: sp
## Loading required package: raster
##
## Attaching package: 'usdm'
## The following object is masked from 'package:VIF':
##
## vif
df=data.frame(df_m4$bops,df_m4$kid,df_m4$price,df_m4$age_band, df_m4$est_income_code, df_m4$white, df_m4$male, df_m4$JAN, df_m4$FEB, df_m4$MAR, df_m4$APR, df_m4$MAY, df_m4$JUN, df_m4$JUL, df_m4$AUG, df_m4$SEP, df_m4$OCT, df_m4$NOV, df_m4$store5998, df_m4$store6, df_m4$y_2011,df_m4$y_2013)
cor(df) #all the correlations are less than 0.8
## df_m4.bops df_m4.kid df_m4.price df_m4.age_band
## df_m4.bops 1.000000000 NA -0.0101172279 NA
## df_m4.kid NA 1 NA NA
## df_m4.price -0.010117228 NA 1.0000000000 NA
## df_m4.age_band NA NA NA 1
## df_m4.est_income_code NA NA NA NA
## df_m4.white -0.028404731 NA -0.0038756811 NA
## df_m4.male NA NA NA NA
## df_m4.JAN -0.007311232 NA -0.0067796287 NA
## df_m4.FEB 0.009480953 NA -0.0037767351 NA
## df_m4.MAR 0.012546736 NA 0.0323048257 NA
## df_m4.APR 0.001051713 NA -0.0009001587 NA
## df_m4.MAY 0.009398969 NA -0.0070794211 NA
## df_m4.JUN 0.011534453 NA 0.0255734488 NA
## df_m4.JUL 0.015268638 NA 0.0270406534 NA
## df_m4.AUG -0.051095312 NA 0.0120217150 NA
## df_m4.SEP -0.018947926 NA 0.0131514042 NA
## df_m4.OCT -0.013401491 NA -0.0074665929 NA
## df_m4.NOV 0.006988895 NA 0.0036523372 NA
## df_m4.store5998 0.072886923 NA 0.0107170312 NA
## df_m4.store6 -0.060354787 NA -0.0189206033 NA
## df_m4.y_2011 -0.112449810 NA -0.0011609810 NA
## df_m4.y_2013 0.030826311 NA 0.0052551304 NA
## df_m4.est_income_code df_m4.white df_m4.male
## df_m4.bops NA -0.0284047306 NA
## df_m4.kid NA NA NA
## df_m4.price NA -0.0038756811 NA
## df_m4.age_band NA NA NA
## df_m4.est_income_code 1 NA NA
## df_m4.white NA 1.0000000000 NA
## df_m4.male NA NA 1
## df_m4.JAN NA -0.0049391699 NA
## df_m4.FEB NA 0.0073368986 NA
## df_m4.MAR NA 0.0033286286 NA
## df_m4.APR NA -0.0033764480 NA
## df_m4.MAY NA -0.0046344057 NA
## df_m4.JUN NA -0.0037389399 NA
## df_m4.JUL NA -0.0046412131 NA
## df_m4.AUG NA -0.0001984816 NA
## df_m4.SEP NA -0.0036863832 NA
## df_m4.OCT NA -0.0035437577 NA
## df_m4.NOV NA 0.0034090014 NA
## df_m4.store5998 NA -0.1677960962 NA
## df_m4.store6 NA 0.0152509963 NA
## df_m4.y_2011 NA 0.0156282682 NA
## df_m4.y_2013 NA -0.0171879084 NA
## df_m4.JAN df_m4.FEB df_m4.MAR df_m4.APR
## df_m4.bops -0.007311232 0.009480953 0.012546736 0.0010517127
## df_m4.kid NA NA NA NA
## df_m4.price -0.006779629 -0.003776735 0.032304826 -0.0009001587
## df_m4.age_band NA NA NA NA
## df_m4.est_income_code NA NA NA NA
## df_m4.white -0.004939170 0.007336899 0.003328629 -0.0033764480
## df_m4.male NA NA NA NA
## df_m4.JAN 1.000000000 -0.105232142 -0.068525534 -0.0770189105
## df_m4.FEB -0.105232142 1.000000000 -0.081548330 -0.0916558132
## df_m4.MAR -0.068525534 -0.081548330 1.000000000 -0.0596848395
## df_m4.APR -0.077018911 -0.091655813 -0.059684839 1.0000000000
## df_m4.MAY -0.093225239 -0.110942040 -0.072243731 -0.0811979580
## df_m4.JUN -0.062634251 -0.074537450 -0.048537628 -0.0545536093
## df_m4.JUL -0.064605257 -0.076883032 -0.050065034 -0.0562703301
## df_m4.AUG -0.065238866 -0.077637054 -0.050556042 -0.0568221950
## df_m4.SEP -0.062997766 -0.074970049 -0.048819330 -0.0548702267
## df_m4.OCT -0.073090492 -0.086980826 -0.056640561 -0.0636608579
## df_m4.NOV -0.106762970 -0.127052523 -0.082734626 -0.0929891442
## df_m4.store5998 0.012676869 -0.006808460 -0.006366504 0.0051799761
## df_m4.store6 0.009939449 -0.005839145 -0.005919546 -0.0018472821
## df_m4.y_2011 -0.160924177 -0.191506686 -0.124706174 -0.1401628436
## df_m4.y_2013 0.220260047 0.219152450 0.151079037 0.2185266535
## df_m4.MAY df_m4.JUN df_m4.JUL
## df_m4.bops 0.0093989693 0.011534453 0.015268638
## df_m4.kid NA NA NA
## df_m4.price -0.0070794211 0.025573449 0.027040653
## df_m4.age_band NA NA NA
## df_m4.est_income_code NA NA NA
## df_m4.white -0.0046344057 -0.003738940 -0.004641213
## df_m4.male NA NA NA
## df_m4.JAN -0.0932252389 -0.062634251 -0.064605257
## df_m4.FEB -0.1109420404 -0.074537450 -0.076883032
## df_m4.MAR -0.0722437305 -0.048537628 -0.050065034
## df_m4.APR -0.0811979580 -0.054553609 -0.056270330
## df_m4.MAY 1.0000000000 -0.066032786 -0.068110740
## df_m4.JUN -0.0660327863 1.000000000 -0.045760839
## df_m4.JUL -0.0681107397 -0.045760839 1.000000000
## df_m4.AUG -0.0687787281 -0.046209633 -0.047663782
## df_m4.SEP -0.0664160264 -0.044622230 -0.046026425
## df_m4.OCT -0.0770563832 -0.051771053 -0.053400211
## df_m4.NOV -0.1125559310 -0.075621757 -0.078001461
## df_m4.store5998 -0.0008261705 0.006199132 0.003397435
## df_m4.store6 0.0031296349 -0.001676022 -0.003427566
## df_m4.y_2011 -0.1696559258 -0.113984924 -0.117571860
## df_m4.y_2013 0.2005907218 0.118589887 0.126088923
## df_m4.AUG df_m4.SEP df_m4.OCT df_m4.NOV
## df_m4.bops -0.0510953121 -0.018947926 -0.013401491 0.006988895
## df_m4.kid NA NA NA NA
## df_m4.price 0.0120217150 0.013151404 -0.007466593 0.003652337
## df_m4.age_band NA NA NA NA
## df_m4.est_income_code NA NA NA NA
## df_m4.white -0.0001984816 -0.003686383 -0.003543758 0.003409001
## df_m4.male NA NA NA NA
## df_m4.JAN -0.0652388660 -0.062997766 -0.073090492 -0.106762970
## df_m4.FEB -0.0776370541 -0.074970049 -0.086980826 -0.127052523
## df_m4.MAR -0.0505560416 -0.048819330 -0.056640561 -0.082734626
## df_m4.APR -0.0568221950 -0.054870227 -0.063660858 -0.092989144
## df_m4.MAY -0.0687787281 -0.066416026 -0.077056383 -0.112555931
## df_m4.JUN -0.0462096333 -0.044622230 -0.051771053 -0.075621757
## df_m4.JUL -0.0476637816 -0.046026425 -0.053400211 -0.078001461
## df_m4.AUG 1.0000000000 -0.046477824 -0.053923928 -0.078766452
## df_m4.SEP -0.0464778240 1.000000000 -0.052071522 -0.076060650
## df_m4.OCT -0.0539239279 -0.052071522 1.000000000 -0.088246149
## df_m4.NOV -0.0787664520 -0.076060650 -0.088246149 1.000000000
## df_m4.store5998 -0.0114636277 -0.007341724 -0.001463996 0.012666226
## df_m4.store6 0.0137257731 0.009818707 0.002728975 0.005498699
## df_m4.y_2011 0.1261982207 0.144975576 0.130982368 0.140713606
## df_m4.y_2013 -0.1368321975 -0.132131708 -0.153300221 -0.223924981
## df_m4.store5998 df_m4.store6 df_m4.y_2011
## df_m4.bops 0.0728869233 -0.060354787 -0.112449810
## df_m4.kid NA NA NA
## df_m4.price 0.0107170312 -0.018920603 -0.001160981
## df_m4.age_band NA NA NA
## df_m4.est_income_code NA NA NA
## df_m4.white -0.1677960962 0.015250996 0.015628268
## df_m4.male NA NA NA
## df_m4.JAN 0.0126768690 0.009939449 -0.160924177
## df_m4.FEB -0.0068084604 -0.005839145 -0.191506686
## df_m4.MAR -0.0063665036 -0.005919546 -0.124706174
## df_m4.APR 0.0051799761 -0.001847282 -0.140162844
## df_m4.MAY -0.0008261705 0.003129635 -0.169655926
## df_m4.JUN 0.0061991320 -0.001676022 -0.113984924
## df_m4.JUL 0.0033974348 -0.003427566 -0.117571860
## df_m4.AUG -0.0114636277 0.013725773 0.126198221
## df_m4.SEP -0.0073417242 0.009818707 0.144975576
## df_m4.OCT -0.0014639957 0.002728975 0.130982368
## df_m4.NOV 0.0126662257 0.005498699 0.140713606
## df_m4.store5998 1.0000000000 -0.047190310 -0.097499559
## df_m4.store6 -0.0471903101 1.000000000 0.017788860
## df_m4.y_2011 -0.0974995592 0.017788860 1.000000000
## df_m4.y_2013 0.0878465092 -0.025130106 -0.337522862
## df_m4.y_2013
## df_m4.bops 0.03082631
## df_m4.kid NA
## df_m4.price 0.00525513
## df_m4.age_band NA
## df_m4.est_income_code NA
## df_m4.white -0.01718791
## df_m4.male NA
## df_m4.JAN 0.22026005
## df_m4.FEB 0.21915245
## df_m4.MAR 0.15107904
## df_m4.APR 0.21852665
## df_m4.MAY 0.20059072
## df_m4.JUN 0.11858989
## df_m4.JUL 0.12608892
## df_m4.AUG -0.13683220
## df_m4.SEP -0.13213171
## df_m4.OCT -0.15330022
## df_m4.NOV -0.22392498
## df_m4.store5998 0.08784651
## df_m4.store6 -0.02513011
## df_m4.y_2011 -0.33752286
## df_m4.y_2013 1.00000000
vif(df) #Calculates VIF scores, all are between range (1, 1.66) and they are less than 3, so it is ok.
## Variables VIF
## 1 df_m4.bops 1.032586
## 2 df_m4.kid 1.022891
## 3 df_m4.price 1.025776
## 4 df_m4.age_band 1.047151
## 5 df_m4.est_income_code 1.049683
## 6 df_m4.white 1.006558
## 7 df_m4.male 1.041855
## 8 df_m4.JAN 1.524594
## 9 df_m4.FEB 1.664023
## 10 df_m4.MAR 1.373884
## 11 df_m4.APR 1.476160
## 12 df_m4.MAY 1.613740
## 13 df_m4.JUN 1.264681
## 14 df_m4.JUL 1.251023
## 15 df_m4.AUG 1.141380
## 16 df_m4.SEP 1.112447
## 17 df_m4.OCT 1.156644
## 18 df_m4.NOV 1.269882
## 19 df_m4.store5998 1.005065
## 20 df_m4.store6 1.009599
## 21 df_m4.y_2011 1.441881
## 22 df_m4.y_2013 1.716031
#install.packages("sampleSelection")
#library(sampleSelection)
CB <- function(x) {
ifelse(x > -500,
-exp(dnorm(x, log = TRUE)
- pnorm(x, log.p = TRUE))*x
-exp(2*(dnorm(x, log = TRUE) - pnorm(x, log.p = TRUE))),
-1)
}
lambda <- function(x) {
as.vector(ifelse(x > -30, dnorm(x)/pnorm(x), -x))
# can we prove it?
}
tobitTfit <- function(YS, XS, YO, XO, start,
weights=NULL, print.level=0,
maxMethod="Newton-Raphson",
index=NULL,
binaryOutcome=FALSE,
...) {
### Tobit treatment models:
### The latent variable is:
### YS* = XS'g + u
### The observables are:
### / 1 if YS* > 0
### YS = \ 0 if YS* <= 0
### YO = X'b + YS bT + v
### u, v are correlated
###
### Arguments:
###
### YS binary or logical vector, 0 (FALSE) and 1 (TRUE)
### XS -"- selection, should include
### exclusion restriction
### YO numeric vector, outcomes
### XO explanatory variables for outcomes, should include YS
### index individual parameter indices in the parameter vector.
### Should always be supplied but can generate here for
### testing purposes
### ... additional parameters for maxLik
###
loglik <- function( beta) {
betaS <- beta[iBetaS]
betaO <- beta[iBetaO]
sigma <- beta[iSigma]
if(sigma <= 0) return(NA)
rho <- beta[iRho]
if( ( rho < -1) || ( rho > 1)) return(NA)
# check the range
XS0.betaS <- XS0%*%betaS
# denoted by 'z' in the vignette
XS1.betaS <- XS1%*%betaS
v0 <- YO0 - XO0%*%betaO
v1 <- YO1 - XO1%*%betaO
sqrt1r2 <- sqrt( 1 - rho^2)
B0 <- (-XS0.betaS - rho/sigma*v0)/sqrt1r2
B1 <- (XS1.betaS + rho/sigma*v1)/sqrt1r2
loglik <- numeric(nObs)
loglik[i0] <- -1/2*log( 2*pi) - log( sigma) -
0.5*( v0/sigma)^2 + pnorm( B0, log.p=TRUE)
loglik[i1] <- -1/2*log( 2*pi) -log( sigma) -
0.5*( v1/sigma)^2 + pnorm( B1, log.p=TRUE)
#sum(loglik)
loglik
}
gradlik <- function(beta) {
## gradient is nObs x nParam matrix
betaS <- beta[iBetaS]
betaO <- beta[iBetaO]
sigma <- beta[iSigma]
if(sigma <= 0) return(NA)
rho <- beta[iRho]
if( ( rho < -1) || ( rho > 1)) return(NA)
# check the range
XS0.betaS <- XS0%*%betaS
# denoted by 'z' in the vignette
XS1.betaS <- XS1%*%betaS
v0 <- drop(YO0 - XO0%*%betaO)
v1 <- drop(YO1 - XO1%*%betaO)
sqrt1r2 <- sqrt( 1 - rho^2)
B0 <- (-XS0.betaS - rho/sigma*v0)/sqrt1r2
B1 <- (XS1.betaS + rho/sigma*v1)/sqrt1r2
lambda0 <- drop(lambda(B0))
lambda1 <- drop(lambda(B1))
## now the gradient itself
gradient <- matrix(0, nObs, nParam)
gradient[i0, iBetaS] <- -lambda0*XS0/sqrt1r2
gradient[i1, iBetaS] <- lambda1*XS1/sqrt1r2
gradient[i0,iBetaO] <- (lambda0*rho/sigma/sqrt1r2
+ v0/sigma^2)*XO0
gradient[i1,iBetaO] <- (-lambda1*rho/sigma/sqrt1r2
+ v1/sigma^2)*XO1
gradient[i0,iSigma] <- (-1/sigma + v0^2/sigma^3
+ lambda0*rho/sigma^2*v0/sqrt1r2)
gradient[i1,iSigma] <- (-1/sigma + v1^2/sigma^3
- lambda1*rho/sigma^2*v1/sqrt1r2)
gradient[i0,iRho] <- -lambda0*(v0/sigma + rho*XS0.betaS)/
sqrt1r2^3
gradient[i1,iRho] <- lambda1*(v1/sigma + rho*XS1.betaS)/
sqrt1r2^3
# colSums(gradient)
gradient
}
hesslik <- function(beta) {
# This is a hack in order to avoid numeric problems
## gradient is nObs x nParam matrix
betaS <- beta[iBetaS]
betaO <- beta[iBetaO]
sigma <- beta[iSigma]
if(sigma <= 0) return(NA)
rho <- beta[iRho]
if( ( rho < -1) || ( rho > 1)) return(NA)
# check the range
XS0.betaS <- XS0%*%betaS
# denoted by 'z' in the vignette
XS1.betaS <- XS1%*%betaS
v0 <- drop(YO0 - XO0%*%betaO)
v1 <- drop(YO1 - XO1%*%betaO)
sqrt1r2 <- sqrt( 1 - rho^2)
B0 <- (-XS0.betaS - rho/sigma*v0)/sqrt1r2
B1 <- (XS1.betaS + rho/sigma*v1)/sqrt1r2
lambda0 <- drop(lambda(B0))
lambda1 <- drop(lambda(B1))
CB0 <- drop(CB(B0))
CB1 <- drop(CB(B1))
hess <- array(0, c( nParam, nParam))
hess[,] <- NA
hess[iBetaS,iBetaS] <-
t( XS0) %*% ( XS0 * CB0)/sqrt1r2^2 +
t( XS1) %*% ( XS1 * CB1)/sqrt1r2^2
hess[iBetaS,iBetaO] <-
- t( XS0) %*% ( XO0 * CB0)*rho/sqrt1r2^2/sigma -
t( XS1) %*% ( XO1 * CB1)*rho/sqrt1r2^2/sigma
hess[iBetaO,iBetaS] <- t(hess[iBetaS,iBetaO])
hess[iBetaS,iSigma] <-
-rho/sigma^2/sqrt1r2^2*t( XS0) %*% ( CB0*v0) -
rho/sigma^2/sqrt1r2^2*t( XS1) %*% ( CB1*v1)
hess[iSigma,iBetaS] <- t(hess[iBetaS,iSigma])
hess[iBetaS,iRho] <-
(t(XS0) %*% (CB0*(v0/sigma + rho*XS0.betaS)/sqrt1r2^4
- lambda0*rho/sqrt1r2^3)
+t(XS1) %*% (CB1*(v1/sigma + rho*XS1.betaS)/sqrt1r2^4
+ lambda1*rho/sqrt1r2^3)
)
hess[iRho,iBetaS] <- t(hess[iBetaS,iRho])
##
hess[iBetaO,iBetaO] <-
t( XO0) %*% (XO0*((rho/sqrt1r2)^2*CB0 - 1))/sigma^2 +
t( XO1) %*% (XO1*( (rho/sqrt1r2)^2 * CB1 - 1))/sigma^2
hess[iBetaO,iSigma] <-
(t( XO0) %*% (CB0*rho^2/sigma^3*v0/sqrt1r2^2
- rho/sigma^2*lambda0/sqrt1r2
- 2*v0/sigma^3)
+ t( XO1) %*% (CB1*rho^2/sigma^3*v1/sqrt1r2^2
+ rho/sigma^2*lambda1/sqrt1r2
- 2*v1/sigma^3)
)
hess[iSigma,iBetaO] <- t(hess[iBetaO,iSigma])
hess[iBetaO,iRho] <-
(t(XO0) %*% (-CB0*(v0/sigma + rho*XS0.betaS)/sqrt1r2^4*rho
+ lambda0/sqrt1r2^3)/sigma
+ t(XO1) %*% (-CB1*(v1/sigma + rho*XS1.betaS)/sqrt1r2^4*rho
- lambda1/sqrt1r2^3)/sigma
)
hess[iRho,iBetaO] <- t(hess[iBetaO,iRho])
##
hess[iSigma,iSigma] <-
(sum(1/sigma^2
-3*v0*v0/sigma^4
+ v0*v0/sigma^4*rho^2/sqrt1r2^2*CB0
-2*lambda0*v0/sqrt1r2*rho/sigma^3)
+ sum(1/sigma^2
-3*v1*v1/sigma^4
+rho^2/sigma^4*v1*v1/sqrt1r2^2*CB1
+2*lambda1*v1/sqrt1r2*rho/sigma^3)
)
hess[iSigma,iRho] <-
(sum((-CB0*rho*(v0/sigma + rho*XS0.betaS)/sqrt1r2 + lambda0)
*v0/sigma^2)/sqrt1r2^3
- sum(
(CB1*rho*(v1/sigma + rho*XS1.betaS)/sqrt1r2 + lambda1)
*v1/sigma^2)/sqrt1r2^3
)
hess[iRho,iSigma] <- t(hess[iSigma,iRho])
hess[iRho,iRho] <-
(sum(CB0*( (v0/sigma + rho*XS0.betaS)/sqrt1r2^3)^2
-lambda0*(XS0.betaS*(1 + 2*rho^2) + 3*rho*v0/sigma)/
sqrt1r2^5
)
+ sum(CB1*( (v1/sigma + rho*XS1.betaS)/sqrt1r2^3)^2
+lambda1*( XS1.betaS*( 1 + 2*rho^2) + 3*rho*v1/sigma) /
sqrt1r2^5
)
)
## l.s2x3 is zero
hess
}
## ---------------
NXS <- ncol( XS)
if(is.null(colnames(XS)))
colnames(XS) <- rep("XS", NXS)
NXO <- ncol( XO)
if(is.null(colnames(XO)))
colnames(XO) <- rep("XO", NXO)
nObs <- length( YS)
i0 <- YS==0
i1 <- YS==1
NO1 <- length( YS[i0])
NO2 <- length( YS[i1])
if(!is.null(weights)) {
warning("Argument 'weight' is ignored by tobitTfit")
}
## indices in for the parameter vector
if(is.null(index)) {
iBetaS <- 1:NXS
iBetaO <- max(iBetaS) + seq(length=NXO)
if(!binaryOutcome) {
iSigma <- max(iBetaO) + 1
iRho <- max(iSigma) + 1
}
else
iRho <- max(iBetaO) + 1
nParam <- iRho
}
else {
iBetaS <- index$betaS
iBetaO <- index$betaO
iSigma <- index$errTerms["sigma"]
iRho <- index$errTerms["rho"]
nParam <- index$nParam
}
## split the data by selection
XS0 <- XS[i0,,drop=FALSE]
XS1 <- XS[i1,,drop=FALSE]
YO0 <- YO[i0]
YO1 <- YO[i1]
XO0 <- XO[i0,,drop=FALSE]
XO1 <- XO[i1,,drop=FALSE]
##
if(print.level > 0) {
cat( "Non-participants: ", NO1,
"; participants: ", NO2, "\n", sep="")
cat( "Initial values:\n")
cat("selection equation betaS:\n")
print(start[iBetaS])
cat("Outcome equation betaO\n")
print(start[iBetaO])
cat("Variance sigma\n")
print(start[iSigma])
cat("Correlation rho\n")
print(start[iRho])
}
result <- maxLik(loglik,
grad=gradlik,
hess=hesslik,
start=start,
print.level=print.level,
method=maxMethod,
...)
## compareDerivatives(#loglik,
## gradlik,
## hesslik,
## t0=start)
result$tobitType <- "treatment"
result$method <- "ml"
class( result ) <- c( "selection", class( result ) )
return( result )
}
treatReg <- function(selection, outcome,
data=sys.frame(sys.parent()),
weights = NULL,
subset,
method="ml",
start=NULL,
ys=FALSE, xs=FALSE,
yo=FALSE, xo=FALSE,
mfs=FALSE, mfo=FALSE,
print.level=0,
...) {
## Heckman-style treatment effect models
## selection: formula
## LHS: must be convertable to two-level factor (e.g. 0-1, 1-2, "A"-"B")
## RHS: ordinary formula as in lm()
## outcome: formula
## should include selection outcome
## ys, xs, yo, xo, mfs, mfo: whether to return the response, model matrix or
## the model frame of outcome and selection equation(s)
## First the consistency checks
## ... additional arguments for tobit2fit and tobit5fit
type <- 0
if(!inherits( selection, "formula" )) {
stop( "argument 'selection' must be a formula" )
}
if( length( selection ) != 3 ) {
stop( "argument 'selection' must be a 2-sided formula" )
}
if(inherits(outcome, "formula")) {
if( length( outcome ) != 3 ) {
stop( "argument 'outcome' must be a 2-sided formula" )
}
}
else
stop("argument 'outcome' must be a formula" )
if(!missing(data)) {
if(!inherits(data, "environment") & !inherits(data, "data.frame") & !inherits(data, "list")) {
stop("'data' must be either environment, data.frame, or list (currently a ", class(data), ")")
}
}
##
if(print.level > 0)
cat("Treatment effect model", type, "model\n")
probitEndogenous <- model.frame( selection, data = data)[ , 1 ]
probitLevels <- levels( as.factor( probitEndogenous ) )
if( length( probitLevels ) != 2 ) {
stop( "the left hand side of 'selection' has to contain",
" exactly two levels (e.g. FALSE and TRUE)" )
}
if( !is.null( weights )) {
warning( "argument 'weights' is ignored" )
weights <- NULL
}
## now check whether two-step method was requested
cl <- match.call()
if(method == "2step") {
twoStep <- heckitTfit(selection, outcome, data=data,
# weights = weights,
print.level = print.level, ... )
twoStep$call <- cl
class(twoStep) <- c("selection", class(twoStep))
return(twoStep)
}
## Now extract model frames etc
## YS (selection equation)
mf <- match.call(expand.dots = FALSE)
m <- match(c("selection", "data", "subset"), names(mf), 0)
mfS <- mf[c(1, m)]
mfS$drop.unused.levels <- TRUE
mfS$na.action <- na.pass
mfS[[1]] <- as.name("model.frame")
names(mfS)[2] <- "formula"
# model.frame requires the parameter to
# be 'formula'
mfS <- eval(mfS, parent.frame())
mtS <- attr(mfS, "terms")
XS <- model.matrix(mtS, mfS)
YS <- model.response(mfS)
YSLevels <- levels( as.factor( YS ) )
if( length( YSLevels ) != 2 ) {
stop( "the left hand side of the 'selection' formula has to contain",
" exactly two levels (e.g. FALSE and TRUE)" )
}
YS <- as.integer(YS == YSLevels[ 2 ])
# selection will be kept as integer internally
## check for NA-s. Because we have to find NA-s in several frames, we cannot use the standard na.
## functions here. Find bad rows and remove them later.
## We check XS and YS separately, because mfS may be a data frame with complex structure (e.g.
## including matrices)
badRow <- !complete.cases(YS, XS)
badRow <- badRow | is.infinite(YS)
badRow <- badRow | apply(XS, 1, function(v) any(is.infinite(v)))
## YO (outcome equation)
## Here we should include a possibility for the user to
## specify the model. Currently just a guess.
binaryOutcome <- FALSE
oArg <- match("outcome", names(mf), 0)
# find the outcome argument
m <- match(c("outcome", "data", "subset",
"offset"), names(mf), 0)
## replace the outcome list by the first equation and evaluate it
mfO <- mf[c(1, m)]
mfO$drop.unused.levels <- TRUE
mfO$na.action <- na.pass
mfO[[1]] <- as.name("model.frame")
# eval it as model frame
names(mfO)[2] <- "formula"
mfO <- eval(mfO, parent.frame())
# Note: if unobserved variables are
# marked as NA, eval returns a
# subframe of visible variables only.
# We have to check it later
mtO <- attr(mfO, "terms")
XO <- model.matrix(mtO, mfO)
YO <- model.response(mfO)
if(is.logical(YO) |
(is.factor(YO) & length(levels(YO)) == 2)) {
binaryOutcome <- TRUE
}
## Now figure out if selection outcome is in fact used as
## explanatory variable for the outcome
selectionVariable <- as.character(selection[[2]])
# name of the selection outcome
##
badRow <- badRow | !complete.cases(YO, XO)
badRow <- badRow | is.infinite(YO)
badRow <- badRow | apply(XO, 1, function(v) any(is.infinite(v)))
# outcome cases that contain NA, Inf, NaN
if( !is.null( weights ) ) {
if( length( weights ) != length( badRow ) ) {
stop( "number of weights (", length( weights ), ") is not equal",
" to the number of observations (", length( badRow ), ")" )
}
badRow <- badRow | is.na( weights )
badRow <- badRow | is.infinite( weights )
}
if(print.level > 0) {
cat(sum(badRow), "invalid observations\n")
}
if( method == "model.frame" ) {
mf <- mfS
mf <- cbind( mf, mfO[ , ! names( mfO ) %in% names( mf ), drop = FALSE ] )
return( mf[ !badRow, ] )
}
XS <- XS[!badRow,, drop=FALSE]
YS <- YS[!badRow]
XO <- XO[!badRow,, drop=FALSE]
YO <- YO[!badRow]
weightsNoNA <- weights[ !badRow ]
NXS <- ncol(XS)
NXO <- ncol(XO)
## parameter indices in the parameter vector
iBetaS <- seq(length=ncol(XS))
iBetaO <- max(iBetaS) + seq(length=NXO)
if(!binaryOutcome) {
iSigma <- max(iBetaO) + 1
iRho <- max(iSigma) + 1
}
else
iRho <- max(iBetaO) + 1
nParam <- iRho
if(binaryOutcome) {
iErrTerms <- c(rho=iRho)
}
else {
iErrTerms <- c(sigma=iSigma, rho=iRho )
}
index <- list(betaS=iBetaS,
betaO=iBetaO,
errTerms=iErrTerms,
outcome = iBetaO,
nParam=iRho)
##
twoStep <- NULL
if(is.null(start)) {
# start values by Heckman 2-step method
start <- numeric(nParam)
twoStep <- heckitTfit(selection, outcome, data=data,
print.level = print.level,
# weights = weights
)
coefs <- coef(twoStep, part="full")
start[iBetaS] <- coefs[twoStep$param$index$betaS]
if(!binaryOutcome) {
start[iBetaO] <- coefs[twoStep$param$index$betaO]
start[iSigma] <- coefs[twoStep$param$index$sigma]
}
else
start[iBetaO] <- coefs[twoStep$param$index$betaO]/coefs[twoStep$param$index$sigma]
start[iRho] <- coefs[twoStep$param$index$rho]
if(start[iRho] > 0.99)
start[iRho] <- 0.99
else if(start[iRho] < -0.99)
start[iRho] <- -0.99
}
if(is.null(names(start))) {
if(!binaryOutcome) {
names(start) <- c(colnames(XS), colnames(XO), "sigma",
"rho")
}
else
names(start) <- c(colnames(XS), colnames(XO),
"rho")
} # add names to start values if not present
if(!binaryOutcome) {
estimation <- tobitTfit(YS, XS, YO, XO, start,
# weights = weightsNoNA,
print.level=print.level,
index=index,
binaryOutcome=binaryOutcome,
...)
}
else {
## estimation <- tobitTBfit(YS, XS, YO, XO, start, weights = weightsNoNA,
## print.level=print.level, ...)
## iErrTerms <- c(rho=iRho)
stop("Binary outcome models are not implemented")
}
param <- list(index=index,
NXS=ncol(XS), NXO=ncol(XO),
N0=sum(YS==0), N1=sum(YS==1),
nObs=length(YS), nParam=length(start),
df=length(YS) - length(start),
levels=YSLevels,
# levels[1]: selection 1; levels[2]:
# selection 2
selectionVariableName=selectionVariable
# which explanatory variable is selection outcome
)
result <- c(estimation,
twoStep=list(twoStep),
start=list(start),
param=list(param),
call=cl,
termsS=mtS,
termsO=mtO,
ys=switch(as.character(ys), "TRUE"=list(YS), "FALSE"=NULL),
xs=switch(as.character(xs), "TRUE"=list(XS), "FALSE"=NULL),
yo=switch(as.character(yo), "TRUE"=list(YO), "FALSE"=NULL),
xo=switch(as.character(xo), "TRUE"=list(XO), "FALSE"=NULL),
mfs=switch(as.character(mfs), "TRUE"=list(mfS[!badRow,]), "FALSE"=NULL),
mfo=switch(as.character(mfs),
"TRUE"=list(mfO[!badRow,]), "FALSE"=NULL)
)
result$binaryOutcome <- binaryOutcome
class( result ) <- class( estimation )
return(result)
}
In this return model, BOPS is a self-select decision from the customer, therefore, there is a selection bias, the suspect that BOPS is endogenous, we therefore use this treatment-effect model to address this problem. However, the result from this model: insignificant rho of -0.00137, we can conclude that, the endogeneity problem is negligible in this case.
#model1<-treatReg(bops~kid+price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number)+factor(year), return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number)+factor(fiscalyr), data=df_m4, method="ML")
#summary(model1)
#df_m4$predict_return=predict(model1, newdata=df_m4,type = "response")
#ttest=t.test(df_m4$predict_return~df_m4$bops)
#ATE=ttest$estimate[2]-ttest$estimate[1]
#names(ATE)<-"ATE"
#ATE ##result from stata ATE=0.011, my ATE=0.0203
probit_treatment<- glm(bops~kid+price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number)+factor(year),data=df_m4,family=binomial(link="probit"))
df_m4$predict_bops=predict(probit_treatment, newdata=df_m4,type = "response")
df_m4$predicted_bops=ifelse(df_m4$predict_bops >= 0.5,1,0)
mean(df_m4$predicted_bops != df_m4$bops,na.rm=TRUE)
## [1] 0.2301414
misClasificError <- mean(df_m4$predicted_bops != df_m4$bops,na.rm=TRUE) # count number of wrong classifications
print(paste('Accuracy',1-misClasificError)) #accurarcy is 76.91%
## [1] "Accuracy 0.769858571226574"
Interpretation of the variable of interest, BOPS: Assume that we sell 1000 units. Since the return rate when BOPS=0 is 9.6%, customers will returns 96 units. If these customers use BOPS, the return rate will be 0.096 + 0.02=0.116. Hence, we would get 116 returned products under BOPS strategy. This corresponds to a 20.8% (=(116-96)/96) increase to total number of returns.
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
probit1<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number) + factor(fiscalyr), data=df_m4,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#install.packages("mfx")
dwtest(probit1)
##
## Durbin-Watson test
##
## data: probit1
## DW = 1.7909, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
library(mfx)
## Loading required package: sandwich
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:raster':
##
## area, select
## Loading required package: betareg
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number) + factor(fiscalyr), data=df_m4,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(store_number) +
## factor(fiscalyr), data = df_m4, robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops 1.3372e-02 7.4501e-04 17.9489 < 2.2e-16 ***
## price 6.3374e-05 1.7961e-06 35.2854 < 2.2e-16 ***
## age_band -1.0423e-03 8.0301e-05 -12.9804 < 2.2e-16 ***
## est_income_code 1.2424e-03 1.3689e-04 9.0760 < 2.2e-16 ***
## white -3.3155e-03 6.0078e-04 -5.5187 3.415e-08 ***
## male -2.2401e-02 6.2346e-04 -35.9308 < 2.2e-16 ***
## factor(month_index2)1 2.0660e-03 2.0113e-03 1.0272 0.3043217
## factor(month_index2)2 6.9244e-04 2.0254e-03 0.3419 0.7324485
## factor(month_index2)3 -6.4963e-03 1.8288e-03 -3.5522 0.0003821 ***
## factor(month_index2)4 -5.1723e-03 1.6431e-03 -3.1479 0.0016447 **
## factor(month_index2)5 -1.7662e-02 1.4772e-03 -11.9565 < 2.2e-16 ***
## factor(month_index2)6 1.0119e-02 1.8770e-03 5.3912 7.000e-08 ***
## factor(month_index2)7 -9.0031e-03 1.6195e-03 -5.5591 2.712e-08 ***
## factor(month_index2)8 1.1212e-02 2.0685e-03 5.4202 5.952e-08 ***
## factor(month_index2)9 -1.1690e-02 1.7530e-03 -6.6688 2.579e-11 ***
## factor(month_index2)10 -1.7107e-02 1.5908e-03 -10.7536 < 2.2e-16 ***
## factor(month_index2)11 2.9297e-03 2.0707e-03 1.4149 0.1571090
## factor(store_number)6 -9.9732e-03 1.1473e-03 -8.6930 < 2.2e-16 ***
## factor(store_number)5998 -6.0697e-02 1.3326e-02 -4.5546 5.248e-06 ***
## factor(fiscalyr)13 -1.2039e-02 6.1194e-04 -19.6733 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"
## [15] "factor(store_number)6" "factor(store_number)5998"
## [17] "factor(fiscalyr)13"
aggregate(df_m4$return~df_m4$bops, FUN=mean)
## df_m4$bops df_m4$return
## 1 0 0.09679147
## 2 1 0.10945370
#install.packages("lmtest")
library(lmtest)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:usdm':
##
## vif
## The following object is masked from 'package:VIF':
##
## vif
probit1<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number)+factor(fiscalyr), data=df_m4, family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
gqtest(probit1) # Goldfeld-Quandt test
##
## Goldfeld-Quandt test
##
## data: probit1
## GQ = 0.88268, df1 = 491500, df2 = 491500, p-value = 1
## alternative hypothesis: variance increases from segment 1 to 2
bptest(probit1)# Breusch-Pagan test
##
## studentized Breusch-Pagan test
##
## data: probit1
## BP = 18257, df = 20, p-value < 2.2e-16
#it shows heteroskedasticity, but using normal se is okay. Obtaining robust se for treatReg is beyond this course.
In DW test, the result shows a positive serial correlation but it is in the acceptable range. And in Lagrange Multiplier test also indicates a serial correlation. To fix this, we will need to obtain Newey-West standard error.
#install.packages("DataCombine")
library(DataCombine)
##
## Attaching package: 'DataCombine'
## The following object is masked from 'package:raster':
##
## shift
dwtest(probit1) # Durbin-Watson test - DW=1.79 < 2, and significant, indicating a positive serial correlation, but it is in a acceptable range
##
## Durbin-Watson test
##
## data: probit1
## DW = 1.7909, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
#library(FinTS)
#arch.test(df_m4$return) #There is serial correlation
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:usdm':
##
## Variogram
## The following object is masked from 'package:raster':
##
## getData
coeftest(probit1,vcov.=NeweyWest)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2185e+00 1.2647e-02 -96.3459 < 2.2e-16 ***
## bops 7.5381e-02 4.8620e-03 15.5042 < 2.2e-16 ***
## price 3.6676e-04 1.0956e-05 33.4764 < 2.2e-16 ***
## age_band -6.0322e-03 5.4636e-04 -11.0407 < 2.2e-16 ***
## est_income_code 7.1900e-03 9.4617e-04 7.5991 2.982e-14 ***
## white -1.9191e-02 4.1188e-03 -4.6593 3.172e-06 ***
## male -1.2930e-01 4.3471e-03 -29.7451 < 2.2e-16 ***
## factor(month_index2)1 1.1873e-02 1.5589e-02 0.7617 0.446252
## factor(month_index2)2 3.9978e-03 1.4947e-02 0.2675 0.789112
## factor(month_index2)3 -3.8435e-02 1.4800e-02 -2.5970 0.009404 **
## factor(month_index2)4 -3.0390e-02 1.2315e-02 -2.4678 0.013595 *
## factor(month_index2)5 -1.0553e-01 1.2173e-02 -8.6693 < 2.2e-16 ***
## factor(month_index2)6 5.6795e-02 1.4090e-02 4.0310 5.555e-05 ***
## factor(month_index2)7 -5.3534e-02 1.2964e-02 -4.1293 3.639e-05 ***
## factor(month_index2)8 6.2580e-02 1.3812e-02 4.5309 5.872e-06 ***
## factor(month_index2)9 -7.0438e-02 1.4495e-02 -4.8595 1.177e-06 ***
## factor(month_index2)10 -1.0474e-01 1.3433e-02 -7.7973 6.324e-15 ***
## factor(month_index2)11 1.6787e-02 1.4519e-02 1.1562 0.247596
## factor(store_number)6 -5.9687e-02 9.9337e-03 -6.0085 1.872e-09 ***
## factor(store_number)5998 -4.9038e-01 9.5280e-02 -5.1467 2.651e-07 ***
## factor(fiscalyr)13 -6.9387e-02 5.2599e-03 -13.1916 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_2012<-subset(df_m4,df_m4$fiscalyr==12)
df_2012_26<-subset(df_2012,df_2012$store2==1|df_2012$store6==1)
df_2013<-subset(df_m4,df_m4$fiscalyr==13)
df_2013_26<-subset(df_2013,df_2012$store2==1|df_2013$store6==1)
## Warning in df_2012$store2 == 1 | df_2013$store6 == 1: longer object length
## is not a multiple of shorter object length
df_store5998<-subset(df_m4,df_m4$store5998==1)
df_store26<-subset(df_m4,df_m4$store2==1|df_m4$store6==1)
##Year 2012
probit_2012<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number) + factor(year), data=df_2012,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number) + factor(year), data=df_2012,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(store_number) +
## factor(year), data = df_2012, robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops 1.3872e-02 1.2283e-03 11.2935 < 2.2e-16 ***
## price 6.4166e-05 2.7926e-06 22.9770 < 2.2e-16 ***
## age_band -7.5040e-04 1.2227e-04 -6.1372 8.398e-10 ***
## est_income_code 9.7563e-04 2.0897e-04 4.6688 3.029e-06 ***
## white -3.3599e-03 9.1607e-04 -3.6677 0.0002448 ***
## male -2.3587e-02 9.5222e-04 -24.7700 < 2.2e-16 ***
## factor(month_index2)1 1.1952e-02 3.1151e-03 3.8368 0.0001246 ***
## factor(month_index2)2 4.0934e-05 2.9157e-03 0.0140 0.9887985
## factor(month_index2)3 1.0864e-03 2.8289e-03 0.3840 0.7009554
## factor(month_index2)4 -1.3166e-04 2.5201e-03 -0.0522 0.9583333
## factor(month_index2)5 -1.0571e-02 2.2174e-03 -4.7672 1.868e-06 ***
## factor(month_index2)6 2.7503e-02 3.0273e-03 9.0851 < 2.2e-16 ***
## factor(month_index2)7 -6.7781e-03 2.4198e-03 -2.8010 0.0050937 **
## factor(month_index2)8 1.1404e-02 3.0821e-03 3.7000 0.0002156 ***
## factor(month_index2)9 3.7700e-03 2.9781e-03 1.2659 0.2055462
## factor(month_index2)10 -1.3933e-02 2.4227e-03 -5.7509 8.874e-09 ***
## factor(month_index2)11 3.7893e-03 3.0348e-03 1.2486 0.2118047
## factor(store_number)6 -1.0142e-02 1.6732e-03 -6.0615 1.349e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"
## [15] "factor(store_number)6"
##Year 2013
probit_2013<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number) + factor(year), data=df_2013,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(store_number) + factor(year), data=df_2013,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(store_number) +
## factor(year), data = df_2013, robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops 1.3049e-02 9.3290e-04 13.9880 < 2.2e-16 ***
## price 6.2362e-05 2.2564e-06 27.6383 < 2.2e-16 ***
## age_band -1.2702e-03 1.0619e-04 -11.9613 < 2.2e-16 ***
## est_income_code 1.4520e-03 1.8073e-04 8.0343 9.414e-16 ***
## white -3.2947e-03 7.9375e-04 -4.1508 3.313e-05 ***
## male -2.1391e-02 8.2165e-04 -26.0347 < 2.2e-16 ***
## factor(month_index2)1 -7.3359e-03 2.5818e-03 -2.8414 0.0044914 **
## factor(month_index2)2 1.4487e-03 2.8337e-03 0.5112 0.6091852
## factor(month_index2)3 -1.3771e-02 2.3531e-03 -5.8522 4.850e-09 ***
## factor(month_index2)4 -1.0616e-02 2.1478e-03 -4.9426 7.709e-07 ***
## factor(month_index2)5 -2.4601e-02 1.9693e-03 -12.4922 < 2.2e-16 ***
## factor(month_index2)6 -3.7112e-03 2.3394e-03 -1.5864 0.1126425
## factor(month_index2)7 -1.2007e-02 2.1624e-03 -5.5527 2.813e-08 ***
## factor(month_index2)8 9.2195e-03 2.7650e-03 3.3343 0.0008551 ***
## factor(month_index2)9 -2.2490e-02 2.1240e-03 -10.5886 < 2.2e-16 ***
## factor(month_index2)10 -2.0812e-02 2.0840e-03 -9.9866 < 2.2e-16 ***
## factor(month_index2)11 1.5102e-03 2.8240e-03 0.5348 0.5928025
## factor(store_number)6 -1.0244e-02 1.5818e-03 -6.4761 9.411e-11 ***
## factor(store_number)5998 -5.8200e-02 1.2424e-02 -4.6847 2.804e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"
## [15] "factor(store_number)6" "factor(store_number)5998"
##df_store5998
probit_store5998<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2)+ factor(year), data=df_store5998,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_store5998,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(year), data = df_store5998,
## robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops -1.1625e-06 2.5901e-06 -0.4488 0.65357
## price -2.4356e-10 6.8997e-10 -0.3530 0.72409
## age_band 5.3592e-09 1.7979e-08 0.2981 0.76565
## est_income_code 3.5399e-08 1.3341e-07 0.2653 0.79075
## white -1.7441e-05 2.9136e-05 -0.5986 0.54942
## male -3.8016e-06 7.1690e-06 -0.5303 0.59591
## factor(month_index2)1 1.2158e-03 3.3819e-03 0.3595 0.71921
## factor(month_index2)2 -2.4019e-08 1.1962e-07 -0.2008 0.84087
## factor(month_index2)3 1.3437e-07 6.3174e-07 0.2127 0.83156
## factor(month_index2)4 3.7382e-01 3.4793e-01 1.0744 0.28263
## factor(month_index2)5 2.1639e-01 1.1545e-01 1.8744 0.06088 .
## factor(month_index2)6 7.9309e-08 3.6404e-07 0.2179 0.82754
## factor(month_index2)7 8.2616e-01 1.4528e-01 5.6865 1.297e-08 ***
## factor(month_index2)8 2.1235e-08 1.8537e-07 0.1146 0.90880
## factor(month_index2)9 -2.4128e-08 1.0217e-07 -0.2362 0.81331
## factor(month_index2)10 2.3553e-07 9.2381e-07 0.2550 0.79875
## factor(month_index2)11 9.6010e-01 1.3917e-01 6.8990 5.236e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"
##df_store 2 & 6
probit_store26<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_store26,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_store26,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(year), data = df_store26,
## robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops 1.3862e-02 7.4809e-04 18.5301 < 2.2e-16 ***
## price 6.3498e-05 1.7995e-06 35.2874 < 2.2e-16 ***
## age_band -1.0625e-03 8.0297e-05 -13.2324 < 2.2e-16 ***
## est_income_code 1.2689e-03 1.3690e-04 9.2688 < 2.2e-16 ***
## white -3.3229e-03 6.0096e-04 -5.5294 3.214e-08 ***
## male -2.2263e-02 6.2351e-04 -35.7054 < 2.2e-16 ***
## factor(month_index2)1 -9.6305e-03 1.9229e-03 -5.0083 5.491e-07 ***
## factor(month_index2)2 -1.0912e-02 1.9484e-03 -5.6002 2.141e-08 ***
## factor(month_index2)3 -1.7429e-02 1.7540e-03 -9.9365 < 2.2e-16 ***
## factor(month_index2)4 -1.6331e-02 1.6036e-03 -10.1842 < 2.2e-16 ***
## factor(month_index2)5 -2.8546e-02 1.5014e-03 -19.0131 < 2.2e-16 ***
## factor(month_index2)6 9.9123e-03 1.8778e-03 5.2788 1.300e-07 ***
## factor(month_index2)7 -9.0883e-03 1.6194e-03 -5.6122 1.997e-08 ***
## factor(month_index2)8 1.1161e-02 2.0687e-03 5.3953 6.841e-08 ***
## factor(month_index2)9 -1.1874e-02 1.7549e-03 -6.7664 1.320e-11 ***
## factor(month_index2)10 -1.7215e-02 1.5906e-03 -10.8227 < 2.2e-16 ***
## factor(month_index2)11 2.8568e-03 2.0700e-03 1.3801 0.1676
## factor(year)2012 -1.3030e-02 8.4545e-04 -15.4117 < 2.2e-16 ***
## factor(year)2013 -2.2710e-02 1.1140e-03 -20.3858 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"
## [15] "factor(year)2012" "factor(year)2013"
##df_store 2 & 6, 2012
probit_store2012_26<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_2012_26,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_2012_26,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(year), data = df_2012_26,
## robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops 1.4234e-02 1.2289e-03 11.5823 < 2.2e-16 ***
## price 6.4331e-05 2.7967e-06 23.0027 < 2.2e-16 ***
## age_band -7.8120e-04 1.2224e-04 -6.3906 1.652e-10 ***
## est_income_code 1.0085e-03 2.0894e-04 4.8267 1.388e-06 ***
## white -3.4004e-03 9.1619e-04 -3.7115 0.0002061 ***
## male -2.3445e-02 9.5241e-04 -24.6167 < 2.2e-16 ***
## factor(month_index2)1 1.1900e-02 3.1146e-03 3.8206 0.0001332 ***
## factor(month_index2)2 -5.5985e-06 2.9152e-03 -0.0019 0.9984677
## factor(month_index2)3 1.0841e-03 2.8292e-03 0.3832 0.7015695
## factor(month_index2)4 -1.8623e-04 2.5196e-03 -0.0739 0.9410792
## factor(month_index2)5 -1.0557e-02 2.2178e-03 -4.7601 1.935e-06 ***
## factor(month_index2)6 2.7290e-02 3.0241e-03 9.0243 < 2.2e-16 ***
## factor(month_index2)7 -6.8369e-03 2.4193e-03 -2.8259 0.0047142 **
## factor(month_index2)8 1.1391e-02 3.0822e-03 3.6958 0.0002192 ***
## factor(month_index2)9 3.6257e-03 2.9758e-03 1.2184 0.2230613
## factor(month_index2)10 -1.3999e-02 2.4220e-03 -5.7800 7.471e-09 ***
## factor(month_index2)11 3.7291e-03 3.0341e-03 1.2290 0.2190543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"
##df_store 2 & 6, 2013
probit_store2013_26<-glm(return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_2013_26,family=binomial(link="probit"))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probitmfx(formula=return ~ bops + price+age_band + est_income_code + white + male + factor(month_index2) + factor(year), data=df_2013_26,robust=TRUE)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Call:
## probitmfx(formula = return ~ bops + price + age_band + est_income_code +
## white + male + factor(month_index2) + factor(year), data = df_2013_26,
## robust = TRUE)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## bops 1.2801e-02 9.6333e-04 13.2885 < 2.2e-16 ***
## price 6.2362e-05 2.1606e-06 28.8625 < 2.2e-16 ***
## age_band -1.2724e-03 1.1006e-04 -11.5605 < 2.2e-16 ***
## est_income_code 1.5169e-03 1.8760e-04 8.0859 6.173e-16 ***
## white -3.2249e-03 8.2383e-04 -3.9145 9.060e-05 ***
## male -2.0709e-02 8.5026e-04 -24.3556 < 2.2e-16 ***
## factor(month_index2)1 -7.6298e-03 2.6073e-03 -2.9263 0.003431 **
## factor(month_index2)2 1.3043e-03 2.8610e-03 0.4559 0.648468
## factor(month_index2)3 -1.3981e-02 2.3814e-03 -5.8707 4.339e-09 ***
## factor(month_index2)4 -1.0871e-02 2.1731e-03 -5.0027 5.653e-07 ***
## factor(month_index2)5 -2.5045e-02 2.0044e-03 -12.4946 < 2.2e-16 ***
## factor(month_index2)6 -3.8802e-03 2.3639e-03 -1.6414 0.100713
## factor(month_index2)7 -1.2164e-02 2.1896e-03 -5.5555 2.768e-08 ***
## factor(month_index2)8 9.4851e-03 2.7960e-03 3.3923 0.000693 ***
## factor(month_index2)9 -1.3337e-02 2.7825e-03 -4.7930 1.643e-06 ***
## factor(month_index2)10 -1.7054e-02 2.2318e-03 -7.6414 2.149e-14 ***
## factor(month_index2)11 1.5823e-03 2.8557e-03 0.5541 0.579534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "bops" "white"
## [3] "male" "factor(month_index2)1"
## [5] "factor(month_index2)2" "factor(month_index2)3"
## [7] "factor(month_index2)4" "factor(month_index2)5"
## [9] "factor(month_index2)6" "factor(month_index2)7"
## [11] "factor(month_index2)8" "factor(month_index2)9"
## [13] "factor(month_index2)10" "factor(month_index2)11"