Hackathon

Author

AR

remove(list = ls())
train_data <- read.csv("~/Downloads/BCE Train Data.csv", 
                       header=TRUE
                       )
test_data <- read.csv("~/Downloads/BCE Test Data.csv", 
                      header=TRUE) 

sample_submission <- read.csv("~/Downloads/BCE Sample Submission (1).csv", header=TRUE, 
                            )
library(visdat)
library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
vis_dat(train_data)

stargazer(train_data, type = "text")

====================================================================
Statistic                                N   Mean   St. Dev. Min Max
--------------------------------------------------------------------
Id                                      423 298.695 172.546   1  602
Age                                     423 30.631   10.188   3  67 
Customer_Reviews_Importance             423  2.525   1.196    1   5 
Personalized_Recommendation_Frequency.1 423  2.674   1.034    1   5 
Rating_Accuracy                         423  2.643   0.899    1   5 
Shopping_Satisfaction                   423  2.454   1.015    1   5 
--------------------------------------------------------------------
library(stR)
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
str(test_data)
'data.frame':   179 obs. of  23 variables:
 $ Id                                     : int  5 6 10 12 14 15 16 19 20 24 ...
 $ Age                                    : int  32 33 26 32 54 43 25 25 32 31 ...
 $ Gender                                 : chr  "Female" "Female" "Male" "Male" ...
 $ Purchase_Frequency                     : chr  "Once a month" "Multiple times a week" "Less than once a month" "Less than once a month" ...
 $ Purchase_Categories                    : chr  "Clothing and Fashion;Home and Kitchen;others" "Groceries and Gourmet Food;Beauty and Personal Care;Clothing and Fashion;Home and Kitchen;others" "Groceries and Gourmet Food;Beauty and Personal Care;Clothing and Fashion;Home and Kitchen;others" "others" ...
 $ Personalized_Recommendation_Frequency  : chr  "No" "No" "Yes" "Sometimes" ...
 $ Browsing_Frequency                     : chr  "Few times a week" "Few times a month" "Few times a week" "Rarely" ...
 $ Product_Search_Method                  : chr  "others" "categories" "Keyword" "categories" ...
 $ Search_Result_Exploration              : chr  "Multiple pages" "Multiple pages" "Multiple pages" "First page" ...
 $ Customer_Reviews_Importance            : int  1 1 1 1 3 2 5 4 1 3 ...
 $ Add_to_Cart_Browsing                   : chr  "Yes" "Yes" "Yes" "No" ...
 $ Cart_Completion_Frequency              : chr  "Sometimes" "Always" "Often" "Always" ...
 $ Cart_Abandonment_Factors               : chr  "Found a better price elsewhere" "Changed my mind or no longer need the item" "Found a better price elsewhere" "Changed my mind or no longer need the item" ...
 $ Saveforlater_Frequency                 : chr  "Rarely" "Often" "Sometimes" "Never" ...
 $ Review_Left                            : chr  "Yes" "Yes" "No" "No" ...
 $ Review_Reliability                     : chr  "Occasionally" "Moderately" "Heavily" "Moderately" ...
 $ Review_Helpfulness                     : chr  "Yes" "Yes" "Yes" "Yes" ...
 $ Personalized_Recommendation_Frequency.1: int  5 2 1 2 3 1 4 3 1 4 ...
 $ Recommendation_Helpfulness             : chr  "Yes" "No" "Yes" "Sometimes" ...
 $ Rating_Accuracy                        : int  1 3 2 2 2 2 3 3 1 2 ...
 $ Service_Appreciation                   : chr  "Competitive prices" "Wide product selection" "Wide product selection" "Wide product selection" ...
 $ Improvement_Areas                      : chr  "Shipping speed and reliability" "Reducing packaging waste" "Customer service responsiveness" "Customer service responsiveness" ...
 $ Shopping_Satisfaction                  : logi  NA NA NA NA NA NA ...
reg0 <-
lm(data = train_data, 
   formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method + Add_to_Cart_Browsing)
summary(reg0)

Call:
lm(formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method + 
    Add_to_Cart_Browsing, data = train_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.04345 -0.66899 -0.03827  0.66722  2.71758 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      1.94954    0.95930   2.032   0.0428 *  
Review_ReliabilityModerately     0.05046    0.12546   0.402   0.6877    
Review_ReliabilityNever          1.02786    0.25293   4.064 5.78e-05 ***
Review_ReliabilityOccasionally   0.34222    0.13817   2.477   0.0137 *  
Review_ReliabilityRarely         0.29695    0.21498   1.381   0.1679    
Product_Search_Methodcategories  0.60355    0.95767   0.630   0.5289    
Product_Search_MethodFilter      0.33161    0.96026   0.345   0.7300    
Product_Search_MethodKeyword     0.28242    0.95892   0.295   0.7685    
Product_Search_Methodothers      1.04111    0.97512   1.068   0.2863    
Add_to_Cart_BrowsingNo          -0.20849    0.12223  -1.706   0.0888 .  
Add_to_Cart_BrowsingYes         -0.24415    0.11515  -2.120   0.0346 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9511 on 412 degrees of freedom
Multiple R-squared:  0.143, Adjusted R-squared:  0.1222 
F-statistic: 6.876 on 10 and 412 DF,  p-value: 5.772e-10
reg1 <-
  lm(data = train_data, 
     formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method + Add_to_Cart_Browsing + Purchase_Frequency + Age + Gender + Browsing_Frequency)
summary(reg1)

Call:
lm(formula = Shopping_Satisfaction ~ Review_Reliability + Product_Search_Method + 
    Add_to_Cart_Browsing + Purchase_Frequency + Age + Gender + 
    Browsing_Frequency, data = train_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.19660 -0.68032 -0.04159  0.62020  2.65030 

Coefficients:
                                          Estimate Std. Error t value Pr(>|t|)
(Intercept)                               2.529190   0.994787   2.542 0.011383
Review_ReliabilityModerately              0.055166   0.128678   0.429 0.668363
Review_ReliabilityNever                   0.985489   0.261109   3.774 0.000185
Review_ReliabilityOccasionally            0.349301   0.143393   2.436 0.015286
Review_ReliabilityRarely                  0.284857   0.219302   1.299 0.194717
Product_Search_Methodcategories           0.158126   0.974941   0.162 0.871238
Product_Search_MethodFilter              -0.101201   0.977054  -0.104 0.917556
Product_Search_MethodKeyword             -0.095858   0.973440  -0.098 0.921606
Product_Search_Methodothers               0.576033   0.993685   0.580 0.562447
Add_to_Cart_BrowsingNo                   -0.205252   0.125582  -1.634 0.102958
Add_to_Cart_BrowsingYes                  -0.228124   0.119317  -1.912 0.056600
Purchase_FrequencyLess than once a month -0.041188   0.157326  -0.262 0.793607
Purchase_FrequencyMultiple times a week   0.071234   0.183680   0.388 0.698356
Purchase_FrequencyOnce a month            0.035783   0.137587   0.260 0.794939
Purchase_FrequencyOnce a week             0.016946   0.135593   0.125 0.900604
Age                                      -0.003955   0.004832  -0.819 0.413548
GenderMale                               -0.068695   0.120466  -0.570 0.568832
GenderOthers                              0.191992   0.292542   0.656 0.512014
GenderPrefer not to say                   0.049931   0.137765   0.362 0.717216
Browsing_FrequencyFew times a week       -0.028887   0.112013  -0.258 0.796625
Browsing_FrequencyMultiple times a day   -0.398459   0.161966  -2.460 0.014309
Browsing_FrequencyRarely                  0.071622   0.178459   0.401 0.688389
                                            
(Intercept)                              *  
Review_ReliabilityModerately                
Review_ReliabilityNever                  ***
Review_ReliabilityOccasionally           *  
Review_ReliabilityRarely                    
Product_Search_Methodcategories             
Product_Search_MethodFilter                 
Product_Search_MethodKeyword                
Product_Search_Methodothers                 
Add_to_Cart_BrowsingNo                      
Add_to_Cart_BrowsingYes                  .  
Purchase_FrequencyLess than once a month    
Purchase_FrequencyMultiple times a week     
Purchase_FrequencyOnce a month              
Purchase_FrequencyOnce a week               
Age                                         
GenderMale                                  
GenderOthers                                
GenderPrefer not to say                     
Browsing_FrequencyFew times a week          
Browsing_FrequencyMultiple times a day   *  
Browsing_FrequencyRarely                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9534 on 401 degrees of freedom
Multiple R-squared:  0.1617,    Adjusted R-squared:  0.1178 
F-statistic: 3.683 on 21 and 401 DF,  p-value: 1.457e-07
stargazer(reg0, reg1, type ="text")

========================================================================================
                                                       Dependent variable:              
                                         -----------------------------------------------
                                                      Shopping_Satisfaction             
                                                   (1)                     (2)          
----------------------------------------------------------------------------------------
Review_ReliabilityModerately                      0.050                   0.055         
                                                 (0.125)                 (0.129)        
                                                                                        
Review_ReliabilityNever                         1.028***                0.985***        
                                                 (0.253)                 (0.261)        
                                                                                        
Review_ReliabilityOccasionally                   0.342**                 0.349**        
                                                 (0.138)                 (0.143)        
                                                                                        
Review_ReliabilityRarely                          0.297                   0.285         
                                                 (0.215)                 (0.219)        
                                                                                        
Product_Search_Methodcategories                   0.604                   0.158         
                                                 (0.958)                 (0.975)        
                                                                                        
Product_Search_MethodFilter                       0.332                  -0.101         
                                                 (0.960)                 (0.977)        
                                                                                        
Product_Search_MethodKeyword                      0.282                  -0.096         
                                                 (0.959)                 (0.973)        
                                                                                        
Product_Search_Methodothers                       1.041                   0.576         
                                                 (0.975)                 (0.994)        
                                                                                        
Add_to_Cart_BrowsingNo                           -0.208*                 -0.205         
                                                 (0.122)                 (0.126)        
                                                                                        
Add_to_Cart_BrowsingYes                         -0.244**                 -0.228*        
                                                 (0.115)                 (0.119)        
                                                                                        
Purchase_FrequencyLess than once a month                                 -0.041         
                                                                         (0.157)        
                                                                                        
Purchase_FrequencyMultiple times a week                                   0.071         
                                                                         (0.184)        
                                                                                        
Purchase_FrequencyOnce a month                                            0.036         
                                                                         (0.138)        
                                                                                        
Purchase_FrequencyOnce a week                                             0.017         
                                                                         (0.136)        
                                                                                        
Age                                                                      -0.004         
                                                                         (0.005)        
                                                                                        
GenderMale                                                               -0.069         
                                                                         (0.120)        
                                                                                        
GenderOthers                                                              0.192         
                                                                         (0.293)        
                                                                                        
GenderPrefer not to say                                                   0.050         
                                                                         (0.138)        
                                                                                        
Browsing_FrequencyFew times a week                                       -0.029         
                                                                         (0.112)        
                                                                                        
Browsing_FrequencyMultiple times a day                                  -0.398**        
                                                                         (0.162)        
                                                                                        
Browsing_FrequencyRarely                                                  0.072         
                                                                         (0.178)        
                                                                                        
Constant                                         1.950**                 2.529**        
                                                 (0.959)                 (0.995)        
                                                                                        
----------------------------------------------------------------------------------------
Observations                                       423                     423          
R2                                                0.143                   0.162         
Adjusted R2                                       0.122                   0.118         
Residual Std. Error                         0.951 (df = 412)        0.953 (df = 401)    
F Statistic                              6.876*** (df = 10; 412) 3.683*** (df = 21; 401)
========================================================================================
Note:                                                        *p<0.1; **p<0.05; ***p<0.01
?predict

#new data must have all variables used in reg1
predictions <- predict(object = reg1, 
                       newdata = test_data)

summary(predictions)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.613   2.122   2.425   2.450   2.661   3.985 
rounded_prediction <- 
round(x = predictions, 
         digits = 0)
summary("rounded_predictions")
   Length     Class      Mode 
        1 character character 
head(cbind(predictions, rounded_prediction))
  predictions rounded_prediction
1    3.106728                  3
2    2.455069                  2
3    1.963601                  2
4    2.372401                  2
5    1.966738                  2
6    1.906147                  2
tail(cbind(predictions, rounded_prediction))
    predictions rounded_prediction
174    2.639570                  3
175    2.082025                  2
176    2.538272                  3
177    3.064434                  3
178    2.822154                  3
179    2.301173                  2
sample_submission$Shopping_Statisfaction <- rounded_prediction

write.csv(sample_submission, 
          file = "Hackathon5.csv", 
          row.names = FALSE)