remove(list = ls())
Hackathon
<- read.csv("~/Downloads/BCE Train Data.csv",
train_data header=TRUE
)<- read.csv("~/Downloads/BCE Test Data.csv",
test_data header=TRUE)
<- read.csv("~/Downloads/BCE Sample Submission (1).csv", header=TRUE,
sample_submission )
library(visdat)
library(stargazer)
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
vis_dat(train_data)
stargazer(train_data, type = "text")
====================================================================
Statistic N Mean St. Dev. Min Max
--------------------------------------------------------------------
Id 423 298.695 172.546 1 602
Age 423 30.631 10.188 3 67
Customer_Reviews_Importance 423 2.525 1.196 1 5
Personalized_Recommendation_Frequency.1 423 2.674 1.034 1 5
Rating_Accuracy 423 2.643 0.899 1 5
Shopping_Satisfaction 423 2.454 1.015 1 5
--------------------------------------------------------------------
library(stR)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
str(test_data)
'data.frame': 179 obs. of 23 variables:
$ Id : int 5 6 10 12 14 15 16 19 20 24 ...
$ Age : int 32 33 26 32 54 43 25 25 32 31 ...
$ Gender : chr "Female" "Female" "Male" "Male" ...
$ Purchase_Frequency : chr "Once a month" "Multiple times a week" "Less than once a month" "Less than once a month" ...
$ Purchase_Categories : chr "Clothing and Fashion;Home and Kitchen;others" "Groceries and Gourmet Food;Beauty and Personal Care;Clothing and Fashion;Home and Kitchen;others" "Groceries and Gourmet Food;Beauty and Personal Care;Clothing and Fashion;Home and Kitchen;others" "others" ...
$ Personalized_Recommendation_Frequency : chr "No" "No" "Yes" "Sometimes" ...
$ Browsing_Frequency : chr "Few times a week" "Few times a month" "Few times a week" "Rarely" ...
$ Product_Search_Method : chr "others" "categories" "Keyword" "categories" ...
$ Search_Result_Exploration : chr "Multiple pages" "Multiple pages" "Multiple pages" "First page" ...
$ Customer_Reviews_Importance : int 1 1 1 1 3 2 5 4 1 3 ...
$ Add_to_Cart_Browsing : chr "Yes" "Yes" "Yes" "No" ...
$ Cart_Completion_Frequency : chr "Sometimes" "Always" "Often" "Always" ...
$ Cart_Abandonment_Factors : chr "Found a better price elsewhere" "Changed my mind or no longer need the item" "Found a better price elsewhere" "Changed my mind or no longer need the item" ...
$ Saveforlater_Frequency : chr "Rarely" "Often" "Sometimes" "Never" ...
$ Review_Left : chr "Yes" "Yes" "No" "No" ...
$ Review_Reliability : chr "Occasionally" "Moderately" "Heavily" "Moderately" ...
$ Review_Helpfulness : chr "Yes" "Yes" "Yes" "Yes" ...
$ Personalized_Recommendation_Frequency.1: int 5 2 1 2 3 1 4 3 1 4 ...
$ Recommendation_Helpfulness : chr "Yes" "No" "Yes" "Sometimes" ...
$ Rating_Accuracy : int 1 3 2 2 2 2 3 3 1 2 ...
$ Service_Appreciation : chr "Competitive prices" "Wide product selection" "Wide product selection" "Wide product selection" ...
$ Improvement_Areas : chr "Shipping speed and reliability" "Reducing packaging waste" "Customer service responsiveness" "Customer service responsiveness" ...
$ Shopping_Satisfaction : logi NA NA NA NA NA NA ...
<-
reg0 lm(data = train_data,
formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method + Add_to_Cart_Browsing)
summary(reg0)
Call:
lm(formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method +
Add_to_Cart_Browsing, data = train_data)
Residuals:
Min 1Q Median 3Q Max
-2.04345 -0.66899 -0.03827 0.66722 2.71758
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.94954 0.95930 2.032 0.0428 *
Review_ReliabilityModerately 0.05046 0.12546 0.402 0.6877
Review_ReliabilityNever 1.02786 0.25293 4.064 5.78e-05 ***
Review_ReliabilityOccasionally 0.34222 0.13817 2.477 0.0137 *
Review_ReliabilityRarely 0.29695 0.21498 1.381 0.1679
Product_Search_Methodcategories 0.60355 0.95767 0.630 0.5289
Product_Search_MethodFilter 0.33161 0.96026 0.345 0.7300
Product_Search_MethodKeyword 0.28242 0.95892 0.295 0.7685
Product_Search_Methodothers 1.04111 0.97512 1.068 0.2863
Add_to_Cart_BrowsingNo -0.20849 0.12223 -1.706 0.0888 .
Add_to_Cart_BrowsingYes -0.24415 0.11515 -2.120 0.0346 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9511 on 412 degrees of freedom
Multiple R-squared: 0.143, Adjusted R-squared: 0.1222
F-statistic: 6.876 on 10 and 412 DF, p-value: 5.772e-10
<-
reg1 lm(data = train_data,
formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method + Add_to_Cart_Browsing + Purchase_Frequency + Age + Gender + Browsing_Frequency)
summary(reg1)
Call:
lm(formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method +
Add_to_Cart_Browsing + Purchase_Frequency + Age + Gender +
Browsing_Frequency, data = train_data)
Residuals:
Min 1Q Median 3Q Max
-2.19660 -0.68032 -0.04159 0.62020 2.65030
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.529190 0.994787 2.542 0.011383
Review_ReliabilityModerately 0.055166 0.128678 0.429 0.668363
Review_ReliabilityNever 0.985489 0.261109 3.774 0.000185
Review_ReliabilityOccasionally 0.349301 0.143393 2.436 0.015286
Review_ReliabilityRarely 0.284857 0.219302 1.299 0.194717
Product_Search_Methodcategories 0.158126 0.974941 0.162 0.871238
Product_Search_MethodFilter -0.101201 0.977054 -0.104 0.917556
Product_Search_MethodKeyword -0.095858 0.973440 -0.098 0.921606
Product_Search_Methodothers 0.576033 0.993685 0.580 0.562447
Add_to_Cart_BrowsingNo -0.205252 0.125582 -1.634 0.102958
Add_to_Cart_BrowsingYes -0.228124 0.119317 -1.912 0.056600
Purchase_FrequencyLess than once a month -0.041188 0.157326 -0.262 0.793607
Purchase_FrequencyMultiple times a week 0.071234 0.183680 0.388 0.698356
Purchase_FrequencyOnce a month 0.035783 0.137587 0.260 0.794939
Purchase_FrequencyOnce a week 0.016946 0.135593 0.125 0.900604
Age -0.003955 0.004832 -0.819 0.413548
GenderMale -0.068695 0.120466 -0.570 0.568832
GenderOthers 0.191992 0.292542 0.656 0.512014
GenderPrefer not to say 0.049931 0.137765 0.362 0.717216
Browsing_FrequencyFew times a week -0.028887 0.112013 -0.258 0.796625
Browsing_FrequencyMultiple times a day -0.398459 0.161966 -2.460 0.014309
Browsing_FrequencyRarely 0.071622 0.178459 0.401 0.688389
(Intercept) *
Review_ReliabilityModerately
Review_ReliabilityNever ***
Review_ReliabilityOccasionally *
Review_ReliabilityRarely
Product_Search_Methodcategories
Product_Search_MethodFilter
Product_Search_MethodKeyword
Product_Search_Methodothers
Add_to_Cart_BrowsingNo
Add_to_Cart_BrowsingYes .
Purchase_FrequencyLess than once a month
Purchase_FrequencyMultiple times a week
Purchase_FrequencyOnce a month
Purchase_FrequencyOnce a week
Age
GenderMale
GenderOthers
GenderPrefer not to say
Browsing_FrequencyFew times a week
Browsing_FrequencyMultiple times a day *
Browsing_FrequencyRarely
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9534 on 401 degrees of freedom
Multiple R-squared: 0.1617, Adjusted R-squared: 0.1178
F-statistic: 3.683 on 21 and 401 DF, p-value: 1.457e-07
stargazer(reg0, reg1, type ="text")
========================================================================================
Dependent variable:
-----------------------------------------------
Shopping_Satisfaction
(1) (2)
----------------------------------------------------------------------------------------
Review_ReliabilityModerately 0.050 0.055
(0.125) (0.129)
Review_ReliabilityNever 1.028*** 0.985***
(0.253) (0.261)
Review_ReliabilityOccasionally 0.342** 0.349**
(0.138) (0.143)
Review_ReliabilityRarely 0.297 0.285
(0.215) (0.219)
Product_Search_Methodcategories 0.604 0.158
(0.958) (0.975)
Product_Search_MethodFilter 0.332 -0.101
(0.960) (0.977)
Product_Search_MethodKeyword 0.282 -0.096
(0.959) (0.973)
Product_Search_Methodothers 1.041 0.576
(0.975) (0.994)
Add_to_Cart_BrowsingNo -0.208* -0.205
(0.122) (0.126)
Add_to_Cart_BrowsingYes -0.244** -0.228*
(0.115) (0.119)
Purchase_FrequencyLess than once a month -0.041
(0.157)
Purchase_FrequencyMultiple times a week 0.071
(0.184)
Purchase_FrequencyOnce a month 0.036
(0.138)
Purchase_FrequencyOnce a week 0.017
(0.136)
Age -0.004
(0.005)
GenderMale -0.069
(0.120)
GenderOthers 0.192
(0.293)
GenderPrefer not to say 0.050
(0.138)
Browsing_FrequencyFew times a week -0.029
(0.112)
Browsing_FrequencyMultiple times a day -0.398**
(0.162)
Browsing_FrequencyRarely 0.072
(0.178)
Constant 1.950** 2.529**
(0.959) (0.995)
----------------------------------------------------------------------------------------
Observations 423 423
R2 0.143 0.162
Adjusted R2 0.122 0.118
Residual Std. Error 0.951 (df = 412) 0.953 (df = 401)
F Statistic 6.876*** (df = 10; 412) 3.683*** (df = 21; 401)
========================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
?predict
#new data must have all variables used in reg1
<- predict(object = reg1,
predictions newdata = test_data)
summary(predictions)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.613 2.122 2.425 2.450 2.661 3.985
<-
rounded_prediction round(x = predictions,
digits = 0)
summary("rounded_predictions")
Length Class Mode
1 character character
head(cbind(predictions, rounded_prediction))
predictions rounded_prediction
1 3.106728 3
2 2.455069 2
3 1.963601 2
4 2.372401 2
5 1.966738 2
6 1.906147 2
tail(cbind(predictions, rounded_prediction))
predictions rounded_prediction
174 2.639570 3
175 2.082025 2
176 2.538272 3
177 3.064434 3
178 2.822154 3
179 2.301173 2
$Shopping_Statisfaction <- rounded_prediction
sample_submission
write.csv(sample_submission,
file = "Hackathon5.csv",
row.names = FALSE)