library("rjson")
library("DT")
library("data.table")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()   masks data.table::between()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks data.table::first()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library("dplyr")
library("usmap")
library("ggplot2")
library("maptools")
## Loading required package: sp
## Checking rgeos availability: FALSE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
library("mapview")
library("devtools")
## Loading required package: usethis
library("vcd")
## Loading required package: grid
library("gridExtra")
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library("shiny")
## 
## Attaching package: 'shiny'
## 
## The following objects are masked from 'package:DT':
## 
##     dataTableOutput, renderDataTable
library("MASS")
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library("faraway")
library("rgdal")
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-6, (SVN revision 1201)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.4.2, released 2022/03/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.2/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE 
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.2/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.6-0
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library("tigris")
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library("sf")
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE

JSON Files Import

brand_tags <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/brand_tags.json")

projects <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/projects.json")

redemptions_2021 <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/redemptions_2021.json")

redemptions_2022 <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/redemptions_2022.json")

redemptions_2023_Jan <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/redemptions_2023_Jan.json")

revenue_view_2021 <-
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/revenue_view_2021.json")

revenue_view_2022 <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/revenue_view_2022.json")

revenue_view_2023_Jan <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/revenue_view_2023_Jan.json")

users_thru_jan23 <- 
  fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/users_thru_jan23.json")

Convert JSON Files to Tabular Data

json_list <- list(brand_tags, projects, redemptions_2021, redemptions_2022, 
               redemptions_2023_Jan, revenue_view_2021, revenue_view_2022, 
               revenue_view_2023_Jan, users_thru_jan23)

####(1)
brand_tags <- lapply(brand_tags, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_brand_tags <- as.data.frame(do.call("cbind", brand_tags)) |> t()

####(2)
projects <- lapply(projects, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_projects <- as.data.frame(do.call("cbind", projects)) |> t()

####(3)
redemptions_2021 <- lapply(redemptions_2021, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_redemptions_2021 <- as.data.frame(do.call("cbind", redemptions_2021)) |> t()

####(4)
redemptions_2022 <- lapply(redemptions_2022, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_redemptions_2022 <- as.data.frame(do.call("cbind", redemptions_2022)) |> t()

####(5)
redemptions_2023_Jan <- lapply(redemptions_2023_Jan, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_redemptions_2023_Jan <- as.data.frame(do.call("cbind", redemptions_2023_Jan)) |> t()

####(6)
revenue_view_2021 <- lapply(revenue_view_2021, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_revenue_view_2021 <- as.data.frame(do.call("cbind", revenue_view_2021)) |> t()

####(7)
revenue_view_2022 <- lapply(revenue_view_2022, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_revenue_view_2022 <- as.data.frame(do.call("cbind", revenue_view_2022)) |> t()

####(8)
revenue_view_2023_Jan <- lapply(revenue_view_2023_Jan, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_revenue_view_2023_Jan <- as.data.frame(do.call("cbind", revenue_view_2023_Jan)) |> t()

####(9)
users_thru_jan23 <- lapply(users_thru_jan23, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

df_users_thru_jan23 <- as.data.frame(do.call("cbind", users_thru_jan23)) |> t()

L <- list(df_brand_tags, df_projects, df_redemptions_2021, 
                df_redemptions_2022, df_redemptions_2023_Jan, 
                df_revenue_view_2021, df_revenue_view_2022, 
                df_revenue_view_2023_Jan, df_users_thru_jan23)

Preliminary Data Check

identical(colnames(df_brand_tags), colnames(df_redemptions_2022))
## [1] FALSE
all.equal(colnames(df_redemptions_2021), colnames(df_redemptions_2022), 
          colnames(df_redemptions_2023_Jan))
## [1] TRUE
all.equal(colnames(df_revenue_view_2021), colnames(df_revenue_view_2022), 
          colnames(df_revenue_view_2023_Jan))
## [1] TRUE

Since the column names of redemptions and revenue_view are same across all three tables (df_redemptions_2021, df_redemptions_2022, df_redemptions_2023_Jan AND df_revenue_view_2021, df_revenue_view_2022, df_revenue_view_2023_Jan), we may consider combining them into two larger tables.

Tabular Conversion

df_redemptions <- 
  rbind(df_redemptions_2021, df_redemptions_2022, df_redemptions_2023_Jan)

df_revenue_view <- 
  rbind(df_revenue_view_2021, df_revenue_view_2022, df_revenue_view_2023_Jan)

L_new <- list(df_brand_tags, df_projects, df_redemptions, 
              df_revenue_view, df_users_thru_jan23)

Exploratory Data Analysis I on Brand Tags

df_brand_tags <- transform(df_brand_tags, 
                             is_featured = as.numeric(is_featured),
                             longitude = as.numeric(longitude),
                             latitude = as.numeric(latitude))

summary(df_brand_tags)
##      name            project_id        project_location_id
##  Length:1821        Length:1821        Length:1821        
##  Class :character   Class :character   Class :character   
##  Mode  :character   Mode  :character   Mode  :character   
##                                                           
##                                                           
##                                                           
##  project_location_name     tag            tag_category       category_desc     
##  Length:1821           Length:1821        Length:1821        Length:1821       
##  Class :character      Class :character   Class :character   Class :character  
##  Mode  :character      Mode  :character   Mode  :character   Mode  :character  
##                                                                                
##                                                                                
##                                                                                
##   is_featured   address            longitude          latitude    
##  Min.   :1    Length:1821        Min.   :-157.83   Min.   :21.28  
##  1st Qu.:1    Class :character   1st Qu.:-118.31   1st Qu.:34.10  
##  Median :1    Mode  :character   Median : -84.37   Median :38.91  
##  Mean   :1                       Mean   : -92.58   Mean   :37.68  
##  3rd Qu.:1                       3rd Qu.: -74.02   3rd Qu.:40.75  
##  Max.   :1                       Max.   : -71.04   Max.   :47.75  
##      city              state            created_at       
##  Length:1821        Length:1821        Length:1821       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
mapview(df_brand_tags, xcol = "longitude", ycol = "latitude", crs = 4269, 
        grid = FALSE, map.types = "Stamen.Toner")
group_by(df_brand_tags, tag) |> count() |> arrange(desc(n))
group_by(df_brand_tags, tag_category, category_desc) |> count() |> arrange(desc(n))
group_by(df_brand_tags, city, state) |> count() |> arrange(desc(n))
ggplot(df_brand_tags) + geom_bar(aes(x = category_desc, fill = state))

Exploratory Data Analysis II on Projects

df_projects <- data.frame(df_projects)

proj <- group_by(df_projects, project_id, project_name, city, state) |> 
  count() |> arrange(desc(n))

proj
proj_ny <- filter(proj, state == "NY")

ggplot(proj_ny) + 
  geom_col(aes(y = proj_ny$project_name, x = proj_ny$n, fill = proj_ny$city)) +
  xlab("Number of Projects") + ylab("Name of Projects") 

proj_tx <- filter(proj, state == "TX")

ggplot(proj_tx) + 
  geom_col(aes(y = proj_tx$project_name, x = proj_tx$n, fill = proj_tx$city)) +
  xlab("Number of Projects") + ylab("Name of Projects") 

proj_wa <- filter(proj, state == "WA")

ggplot(proj_wa) + 
  geom_col(aes(y = proj_wa$project_name, x = proj_wa$n, fill = proj_wa$city)) +
  xlab("Number of Projects") + ylab("Name of Projects") 

Exploratory Data Analysis III on Redemptions

df_redemptions <- data.frame(df_redemptions)

nrow(df_redemptions)
## [1] 183687
ncol(df_redemptions)
## [1] 14
colnames(df_redemptions)
##  [1] "user_id"                 "created_at"             
##  [3] "project_id"              "project.name"           
##  [5] "Location"                "project_location_id"    
##  [7] "city"                    "state"                  
##  [9] "credit_transaction_id"   "total_redemption_amount"
## [11] "tip"                     "Venue.Type...Detail"    
## [13] "Check.Average"           "Service.Type"
is.atomic(df_redemptions)
## [1] FALSE
unique(df_redemptions$Venue.Type...Detail)
##  [1] "Casual Fine Dining" "Nightclub"          "Unknown"           
##  [4] "Casual Dining"      "Fast Casual"        "Fine Dining"       
##  [7] "Cafe"               "Bar"                "Coffee Shop"       
## [10] "Market"             "Fast Food"
unique(df_redemptions$Check.Average)
## [1] "Mid"       "Low"       "High"      "Very Low"  "Very High"
unique(df_redemptions$Service.Type)
## [1] "Full Service"  "Unknown"       "Fast Casual"   "QSR"          
## [5] "Ghost Kitchen"
df_redemptions <- transform(df_redemptions, 
                  total_redemption_amount = as.numeric(total_redemption_amount),
                  tip = as.numeric(tip))

summary(df_redemptions)
##    user_id           created_at         project_id        project.name      
##  Length:183687      Length:183687      Length:183687      Length:183687     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Location         project_location_id     city              state          
##  Length:183687      Length:183687       Length:183687      Length:183687     
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  credit_transaction_id total_redemption_amount      tip          
##  Length:183687         Min.   :    0.00        Min.   :    0.00  
##  Class :character      1st Qu.:   26.76        1st Qu.:    6.00  
##  Mode  :character      Median :   62.00        Median :   14.01  
##                        Mean   :  118.56        Mean   :   25.54  
##                        3rd Qu.:  129.57        3rd Qu.:   28.46  
##                        Max.   :31250.00        Max.   :11002.00  
##                                                NA's   :45316     
##  Venue.Type...Detail Check.Average      Service.Type      
##  Length:183687       Length:183687      Length:183687     
##  Class :character    Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character  
##                                                           
##                                                           
##                                                           
## 
ols_redemptions <- lm(tip ~ total_redemption_amount*Service.Type*Check.Average, 
                      data = df_redemptions)
summary(ols_redemptions)
## 
## Call:
## lm(formula = tip ~ total_redemption_amount * Service.Type * Check.Average, 
##     data = df_redemptions)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2896.0    -2.3    -0.3     0.6 10991.8 
## 
## Coefficients: (18 not defined because of singularities)
##                                                                           Estimate
## (Intercept)                                                               1.793259
## total_redemption_amount                                                   0.152535
## Service.TypeFull Service                                                 -2.437781
## Service.TypeGhost Kitchen                                                -0.795666
## Service.TypeQSR                                                          -1.117294
## Service.TypeUnknown                                                       3.235416
## Check.AverageLow                                                         -0.561204
## Check.AverageMid                                                         -1.497949
## Check.AverageVery High                                                    4.503117
## Check.AverageVery Low                                                    -1.577021
## total_redemption_amount:Service.TypeFull Service                          0.050881
## total_redemption_amount:Service.TypeGhost Kitchen                         0.001499
## total_redemption_amount:Service.TypeQSR                                  -0.031016
## total_redemption_amount:Service.TypeUnknown                               0.073094
## total_redemption_amount:Check.AverageLow                                 -0.049575
## total_redemption_amount:Check.AverageMid                                  0.027365
## total_redemption_amount:Check.AverageVery High                           -0.013735
## total_redemption_amount:Check.AverageVery Low                            -0.009887
## Service.TypeFull Service:Check.AverageLow                                 0.780360
## Service.TypeGhost Kitchen:Check.AverageLow                                      NA
## Service.TypeQSR:Check.AverageLow                                          0.237122
## Service.TypeUnknown:Check.AverageLow                                            NA
## Service.TypeFull Service:Check.AverageMid                                 1.703990
## Service.TypeGhost Kitchen:Check.AverageMid                                      NA
## Service.TypeQSR:Check.AverageMid                                         10.175765
## Service.TypeUnknown:Check.AverageMid                                     -2.303271
## Service.TypeFull Service:Check.AverageVery High                                 NA
## Service.TypeGhost Kitchen:Check.AverageVery High                                NA
## Service.TypeQSR:Check.AverageVery High                                          NA
## Service.TypeUnknown:Check.AverageVery High                                      NA
## Service.TypeFull Service:Check.AverageVery Low                            2.258561
## Service.TypeGhost Kitchen:Check.AverageVery Low                                 NA
## Service.TypeQSR:Check.AverageVery Low                                     1.069066
## Service.TypeUnknown:Check.AverageVery Low                                       NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow         0.041949
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow              NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow                  0.075990
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow                    NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid        -0.027970
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid              NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid                 -0.024507
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid             -0.069361
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High         NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High        NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High                  NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High              NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low    0.012009
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low         NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low             0.019801
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low               NA
##                                                                          Std. Error
## (Intercept)                                                                1.549512
## total_redemption_amount                                                    0.009609
## Service.TypeFull Service                                                   1.578314
## Service.TypeGhost Kitchen                                                  3.635895
## Service.TypeQSR                                                            6.840529
## Service.TypeUnknown                                                        6.561043
## Check.AverageLow                                                           2.270674
## Check.AverageMid                                                           1.748570
## Check.AverageVery High                                                     0.597073
## Check.AverageVery Low                                                      5.029080
## total_redemption_amount:Service.TypeFull Service                           0.009645
## total_redemption_amount:Service.TypeGhost Kitchen                          0.046273
## total_redemption_amount:Service.TypeQSR                                    0.047521
## total_redemption_amount:Service.TypeUnknown                                0.015417
## total_redemption_amount:Check.AverageLow                                   0.029964
## total_redemption_amount:Check.AverageMid                                   0.014769
## total_redemption_amount:Check.AverageVery High                             0.001295
## total_redemption_amount:Check.AverageVery Low                              0.197164
## Service.TypeFull Service:Check.AverageLow                                  2.463496
## Service.TypeGhost Kitchen:Check.AverageLow                                       NA
## Service.TypeQSR:Check.AverageLow                                           7.539997
## Service.TypeUnknown:Check.AverageLow                                             NA
## Service.TypeFull Service:Check.AverageMid                                  1.791621
## Service.TypeGhost Kitchen:Check.AverageMid                                       NA
## Service.TypeQSR:Check.AverageMid                                           7.089032
## Service.TypeUnknown:Check.AverageMid                                       6.638283
## Service.TypeFull Service:Check.AverageVery High                                  NA
## Service.TypeGhost Kitchen:Check.AverageVery High                                 NA
## Service.TypeQSR:Check.AverageVery High                                           NA
## Service.TypeUnknown:Check.AverageVery High                                       NA
## Service.TypeFull Service:Check.AverageVery Low                             5.352948
## Service.TypeGhost Kitchen:Check.AverageVery Low                                  NA
## Service.TypeQSR:Check.AverageVery Low                                      8.391806
## Service.TypeUnknown:Check.AverageVery Low                                        NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow          0.031794
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow               NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow                   0.156117
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow                     NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid          0.014843
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid               NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid                   0.052652
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid               0.019822
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High          NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High         NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High                   NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High               NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low     0.198291
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low          NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low              0.204410
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low                NA
##                                                                          t value
## (Intercept)                                                                1.157
## total_redemption_amount                                                   15.875
## Service.TypeFull Service                                                  -1.545
## Service.TypeGhost Kitchen                                                 -0.219
## Service.TypeQSR                                                           -0.163
## Service.TypeUnknown                                                        0.493
## Check.AverageLow                                                          -0.247
## Check.AverageMid                                                          -0.857
## Check.AverageVery High                                                     7.542
## Check.AverageVery Low                                                     -0.314
## total_redemption_amount:Service.TypeFull Service                           5.275
## total_redemption_amount:Service.TypeGhost Kitchen                          0.032
## total_redemption_amount:Service.TypeQSR                                   -0.653
## total_redemption_amount:Service.TypeUnknown                                4.741
## total_redemption_amount:Check.AverageLow                                  -1.655
## total_redemption_amount:Check.AverageMid                                   1.853
## total_redemption_amount:Check.AverageVery High                           -10.603
## total_redemption_amount:Check.AverageVery Low                             -0.050
## Service.TypeFull Service:Check.AverageLow                                  0.317
## Service.TypeGhost Kitchen:Check.AverageLow                                    NA
## Service.TypeQSR:Check.AverageLow                                           0.031
## Service.TypeUnknown:Check.AverageLow                                          NA
## Service.TypeFull Service:Check.AverageMid                                  0.951
## Service.TypeGhost Kitchen:Check.AverageMid                                    NA
## Service.TypeQSR:Check.AverageMid                                           1.435
## Service.TypeUnknown:Check.AverageMid                                      -0.347
## Service.TypeFull Service:Check.AverageVery High                               NA
## Service.TypeGhost Kitchen:Check.AverageVery High                              NA
## Service.TypeQSR:Check.AverageVery High                                        NA
## Service.TypeUnknown:Check.AverageVery High                                    NA
## Service.TypeFull Service:Check.AverageVery Low                             0.422
## Service.TypeGhost Kitchen:Check.AverageVery Low                               NA
## Service.TypeQSR:Check.AverageVery Low                                      0.127
## Service.TypeUnknown:Check.AverageVery Low                                     NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow          1.319
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow            NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow                   0.487
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow                  NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid         -1.884
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid            NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid                  -0.465
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid              -3.499
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High       NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High      NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High                NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High            NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low     0.061
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low       NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low              0.097
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low             NA
##                                                                          Pr(>|t|)
## (Intercept)                                                              0.247150
## total_redemption_amount                                                   < 2e-16
## Service.TypeFull Service                                                 0.122458
## Service.TypeGhost Kitchen                                                0.826778
## Service.TypeQSR                                                          0.870255
## Service.TypeUnknown                                                      0.621925
## Check.AverageLow                                                         0.804790
## Check.AverageMid                                                         0.391628
## Check.AverageVery High                                                   4.66e-14
## Check.AverageVery Low                                                    0.753840
## total_redemption_amount:Service.TypeFull Service                         1.33e-07
## total_redemption_amount:Service.TypeGhost Kitchen                        0.974159
## total_redemption_amount:Service.TypeQSR                                  0.513966
## total_redemption_amount:Service.TypeUnknown                              2.13e-06
## total_redemption_amount:Check.AverageLow                                 0.098026
## total_redemption_amount:Check.AverageMid                                 0.063915
## total_redemption_amount:Check.AverageVery High                            < 2e-16
## total_redemption_amount:Check.AverageVery Low                            0.960008
## Service.TypeFull Service:Check.AverageLow                                0.751419
## Service.TypeGhost Kitchen:Check.AverageLow                                     NA
## Service.TypeQSR:Check.AverageLow                                         0.974912
## Service.TypeUnknown:Check.AverageLow                                           NA
## Service.TypeFull Service:Check.AverageMid                                0.341561
## Service.TypeGhost Kitchen:Check.AverageMid                                     NA
## Service.TypeQSR:Check.AverageMid                                         0.151169
## Service.TypeUnknown:Check.AverageMid                                     0.728616
## Service.TypeFull Service:Check.AverageVery High                                NA
## Service.TypeGhost Kitchen:Check.AverageVery High                               NA
## Service.TypeQSR:Check.AverageVery High                                         NA
## Service.TypeUnknown:Check.AverageVery High                                     NA
## Service.TypeFull Service:Check.AverageVery Low                           0.673078
## Service.TypeGhost Kitchen:Check.AverageVery Low                                NA
## Service.TypeQSR:Check.AverageVery Low                                    0.898629
## Service.TypeUnknown:Check.AverageVery Low                                      NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow        0.187045
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow             NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow                 0.626436
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow                   NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid        0.059506
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid             NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid                 0.641605
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid             0.000467
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High        NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High       NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High                 NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High             NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low   0.951706
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low        NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low            0.922832
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low              NA
##                                                                             
## (Intercept)                                                                 
## total_redemption_amount                                                  ***
## Service.TypeFull Service                                                    
## Service.TypeGhost Kitchen                                                   
## Service.TypeQSR                                                             
## Service.TypeUnknown                                                         
## Check.AverageLow                                                            
## Check.AverageMid                                                            
## Check.AverageVery High                                                   ***
## Check.AverageVery Low                                                       
## total_redemption_amount:Service.TypeFull Service                         ***
## total_redemption_amount:Service.TypeGhost Kitchen                           
## total_redemption_amount:Service.TypeQSR                                     
## total_redemption_amount:Service.TypeUnknown                              ***
## total_redemption_amount:Check.AverageLow                                 .  
## total_redemption_amount:Check.AverageMid                                 .  
## total_redemption_amount:Check.AverageVery High                           ***
## total_redemption_amount:Check.AverageVery Low                               
## Service.TypeFull Service:Check.AverageLow                                   
## Service.TypeGhost Kitchen:Check.AverageLow                                  
## Service.TypeQSR:Check.AverageLow                                            
## Service.TypeUnknown:Check.AverageLow                                        
## Service.TypeFull Service:Check.AverageMid                                   
## Service.TypeGhost Kitchen:Check.AverageMid                                  
## Service.TypeQSR:Check.AverageMid                                            
## Service.TypeUnknown:Check.AverageMid                                        
## Service.TypeFull Service:Check.AverageVery High                             
## Service.TypeGhost Kitchen:Check.AverageVery High                            
## Service.TypeQSR:Check.AverageVery High                                      
## Service.TypeUnknown:Check.AverageVery High                                  
## Service.TypeFull Service:Check.AverageVery Low                              
## Service.TypeGhost Kitchen:Check.AverageVery Low                             
## Service.TypeQSR:Check.AverageVery Low                                       
## Service.TypeUnknown:Check.AverageVery Low                                   
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow           
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow          
## total_redemption_amount:Service.TypeQSR:Check.AverageLow                    
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow                
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid        .  
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid          
## total_redemption_amount:Service.TypeQSR:Check.AverageMid                    
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid             ***
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High     
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High    
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High              
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High          
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low      
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low     
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low               
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 49.07 on 138339 degrees of freedom
##   (45316 observations deleted due to missingness)
## Multiple R-squared:  0.5027, Adjusted R-squared:  0.5026 
## F-statistic:  4511 on 31 and 138339 DF,  p-value: < 2.2e-16
anova(ols_redemptions)
ggplot(df_redemptions) + geom_point(aes(x = total_redemption_amount, y = tip))
## Warning: Removed 45316 rows containing missing values (`geom_point()`).

plot(x = fitted(ols_redemptions), y = residuals(ols_redemptions))

qqnorm(ols_redemptions$residual, ylab = "Residuals")
qqline(ols_redemptions$residual)

hist(ols_redemptions$residual, xlab = "Residuals")

# Cauchy distribution (Heavy tails)

halfnorm(lm.influence(ols_redemptions)$hat, nlab = 5, ylab = "Leverages")

df_redemptions[c(54470, 114859, 38707, 43022, 32011),]
# 5 likely high leverage points

halfnorm(cooks.distance(ols_redemptions), nlab = 1, ylab = "Cook's distance")

df_redemptions[c(99741),]
# 1 serious influential point

ti <- rstudent(ols_redemptions)
max(abs(ti))
## [1] 280.6126
which(ti == max(abs(ti)))
## V19829.2 
##   137461
p_value <- 2*(1-pt(max(abs(ti)), 
                   df = nrow(df_redemptions) - ncol(df_redemptions) - 1))
alpha_over_n <- 0.05/nrow(df_redemptions)

p_value > alpha_over_n
## [1] FALSE
# no outliers reported by Bonferroni correction
df_redemptions
df_redemp_taco1986 <- filter(df_redemptions, project.name == "Tacos 1986")

ggplot(df_redemp_taco1986) + geom_point(aes(x = df_redemp_taco1986$created_at, 
    y = df_redemp_taco1986$total_redemption_amount)) + xlab("Date (YMD-HMS)") +
  ylab("Total Redemption Amount") + 
  ggtitle("Time-Series Plot of Total Redemption Amount for Tacos 1986")

df_redemp_taco1986_burbank <- filter(df_redemp_taco1986, city == "Burbank")

ggplot(df_redemp_taco1986_burbank) + geom_point(aes(
  x = df_redemp_taco1986_burbank$created_at, 
  y = df_redemp_taco1986_burbank$total_redemption_amount)) + 
  xlab("Date (YMD-HMS)") + ylab("Total Redemption Amount") +
  ggtitle("Time-Series Plot of Total Redemption Amount for Tacos 1986 in Burbank")

df_redemp_taco1986_burbank_2023 <- filter(df_redemp_taco1986_burbank, 
       year(df_redemp_taco1986_burbank$created_at) == 2023)

ggplot(df_redemp_taco1986_burbank_2023) + geom_point(aes(
  x = df_redemp_taco1986_burbank_2023$created_at, 
  y = df_redemp_taco1986_burbank_2023$total_redemption_amount)) + 
  xlab("Date (YMD-HMS)") + ylab("Total Redemption Amount") + 
  ggtitle("Time-Series Plot of Total Redemption Amount for Tacos 1986 in 
          Burbank in Year 2023")

Exploratory Data Analysis IV on Revenue View

df_revenue_view <- data.frame(df_revenue_view)

df_revenue_view <- transform(df_revenue_view, 
                  amount_charged_in_usd = as.numeric(amount_charged_in_usd),
                  credit_given_in_usd = as.numeric(credit_given_in_usd))

summary(df_revenue_view)
##      UID                name            project_id           gcp_id         
##  Length:143806      Length:143806      Length:143806      Length:143806     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     ict_id           created_at        amount_charged_in_usd utm_campaign      
##  Length:143806      Length:143806      Min.   : -9999.00     Length:143806     
##  Class :character   Class :character   1st Qu.:    50.00     Class :character  
##  Mode  :character   Mode  :character   Median :    77.83     Mode  :character  
##                                        Mean   :   180.35                       
##                                        3rd Qu.:   200.00                       
##                                        Max.   :100000.00                       
##   utm_medium        utm_content         utm_source        is_app_purchase   
##  Length:143806      Length:143806      Length:143806      Length:143806     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    user_id          transaction_type   credit_type        credit_given_in_usd
##  Length:143806      Length:143806      Length:143806      Min.   :-15000.0   
##  Class :character   Class :character   Class :character   1st Qu.:    50.0   
##  Mode  :character   Mode  :character   Mode  :character   Median :    84.6   
##                                                           Mean   :   224.0   
##                                                           3rd Qu.:   250.0   
##                                                           Max.   :120000.0   
##   is_excess         stripe_brand        option_id        
##  Length:143806      Length:143806      Length:143806     
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
head(df_revenue_view)
ggplot(df_revenue_view) + 
  geom_violin(aes(x = credit_given_in_usd, y = stripe_brand)) +
  xlab("Amount of credit (USD)") + ylab("Type of credit cards")

df_revenue_view_pac <- filter(df_revenue_view, name == "Penny Ann's Cafe")

ggplot(df_revenue_view_pac, aes(x = amount_charged_in_usd, 
              y = credit_given_in_usd)) + geom_point() + 
              stat_smooth(se = TRUE, method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

df_revenue_view_pac_mastercard <- filter(df_revenue_view_pac, 
                                         stripe_brand == "MasterCard")

ggplot(df_revenue_view_pac_mastercard, aes(x = amount_charged_in_usd, 
              y = credit_given_in_usd)) + geom_point() + 
              stat_smooth(se = TRUE, method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

df_revenue_view_card <- group_by(df_revenue_view, stripe_brand) |> count() 

ggplot(df_revenue_view_card, aes(x="", y= df_revenue_view_card$n, 
                                 fill = df_revenue_view_card$stripe_brand)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = 
  paste0(round(100*df_revenue_view_card$n/sum(df_revenue_view_card$n),2), "%")), 
  position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank()) +
  scale_fill_brewer(palette="Blues")

df_revenue_view
ggplot(df_revenue_view) + geom_histogram(aes(x = amount_charged_in_usd)) + ylim(c(0, 30))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (`geom_bar()`).

ggplot(df_revenue_view) + geom_histogram(aes(x = credit_given_in_usd)) + xlim(c(-500, 8000))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 94 rows containing non-finite values (`stat_bin()`).
## Removed 2 rows containing missing values (`geom_bar()`).

mosaic(~ stripe_brand + is_app_purchase, df_revenue_view)

mosaic(~ transaction_type + credit_type, df_revenue_view)

Exploratory Data Analysis V on Users

df_users_thru_jan23 <- data.frame(df_users_thru_jan23)

df_users_thru_jan23_platform <- group_by(df_users_thru_jan23, user_app_platform) |> count() 

ggplot(df_users_thru_jan23_platform, aes(x="", y= df_users_thru_jan23_platform$n, 
                                 fill = df_users_thru_jan23_platform$user_app_platform)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  geom_text(aes(label = 
  paste0(round(100*df_users_thru_jan23_platform$n/sum(df_users_thru_jan23_platform$n),2), "%")), 
  position = position_stack(vjust=0.5)) +
  labs(x = NULL, y = NULL) +
  theme_classic() +
  theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank()) +
  scale_fill_brewer(palette="Reds")

df_users_mutate <- mutate(df_users_thru_jan23, created_year = year(account_created_at))
df_users_by_year <- group_by(df_users_mutate, created_year, user_app_platform) |> count()

ggplot(df_users_by_year) + geom_col(aes(x = created_year, y = df_users_by_year$n, 
                                    fill = df_users_by_year$user_app_platform))