library("rjson")
library("DT")
library("data.table")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library("dplyr")
library("usmap")
library("ggplot2")
library("maptools")
## Loading required package: sp
## Checking rgeos availability: FALSE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
## Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
## which has a restricted licence. It is disabled by default;
## to enable gpclib, type gpclibPermit()
library("mapview")
library("devtools")
## Loading required package: usethis
library("vcd")
## Loading required package: grid
library("gridExtra")
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library("shiny")
##
## Attaching package: 'shiny'
##
## The following objects are masked from 'package:DT':
##
## dataTableOutput, renderDataTable
library("MASS")
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library("faraway")
library("rgdal")
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-6, (SVN revision 1201)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.4.2, released 2022/03/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.2/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.2/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.6-0
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library("tigris")
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library("sf")
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
JSON Files Import
brand_tags <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/brand_tags.json")
projects <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/projects.json")
redemptions_2021 <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/redemptions_2021.json")
redemptions_2022 <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/redemptions_2022.json")
redemptions_2023_Jan <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/redemptions_2023_Jan.json")
revenue_view_2021 <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/revenue_view_2021.json")
revenue_view_2022 <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/revenue_view_2022.json")
revenue_view_2023_Jan <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/revenue_view_2023_Jan.json")
users_thru_jan23 <-
fromJSON(file = "/Users/apple/Desktop/2023\ Feb\ Transfer/users_thru_jan23.json")
Convert JSON Files to Tabular Data
json_list <- list(brand_tags, projects, redemptions_2021, redemptions_2022,
redemptions_2023_Jan, revenue_view_2021, revenue_view_2022,
revenue_view_2023_Jan, users_thru_jan23)
####(1)
brand_tags <- lapply(brand_tags, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_brand_tags <- as.data.frame(do.call("cbind", brand_tags)) |> t()
####(2)
projects <- lapply(projects, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_projects <- as.data.frame(do.call("cbind", projects)) |> t()
####(3)
redemptions_2021 <- lapply(redemptions_2021, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_redemptions_2021 <- as.data.frame(do.call("cbind", redemptions_2021)) |> t()
####(4)
redemptions_2022 <- lapply(redemptions_2022, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_redemptions_2022 <- as.data.frame(do.call("cbind", redemptions_2022)) |> t()
####(5)
redemptions_2023_Jan <- lapply(redemptions_2023_Jan, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_redemptions_2023_Jan <- as.data.frame(do.call("cbind", redemptions_2023_Jan)) |> t()
####(6)
revenue_view_2021 <- lapply(revenue_view_2021, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_revenue_view_2021 <- as.data.frame(do.call("cbind", revenue_view_2021)) |> t()
####(7)
revenue_view_2022 <- lapply(revenue_view_2022, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_revenue_view_2022 <- as.data.frame(do.call("cbind", revenue_view_2022)) |> t()
####(8)
revenue_view_2023_Jan <- lapply(revenue_view_2023_Jan, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_revenue_view_2023_Jan <- as.data.frame(do.call("cbind", revenue_view_2023_Jan)) |> t()
####(9)
users_thru_jan23 <- lapply(users_thru_jan23, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
df_users_thru_jan23 <- as.data.frame(do.call("cbind", users_thru_jan23)) |> t()
L <- list(df_brand_tags, df_projects, df_redemptions_2021,
df_redemptions_2022, df_redemptions_2023_Jan,
df_revenue_view_2021, df_revenue_view_2022,
df_revenue_view_2023_Jan, df_users_thru_jan23)
Preliminary Data Check
identical(colnames(df_brand_tags), colnames(df_redemptions_2022))
## [1] FALSE
all.equal(colnames(df_redemptions_2021), colnames(df_redemptions_2022),
colnames(df_redemptions_2023_Jan))
## [1] TRUE
all.equal(colnames(df_revenue_view_2021), colnames(df_revenue_view_2022),
colnames(df_revenue_view_2023_Jan))
## [1] TRUE
Since the column names of redemptions and
revenue_view are same across all three tables
(df_redemptions_2021, df_redemptions_2022,
df_redemptions_2023_Jan AND
df_revenue_view_2021, df_revenue_view_2022,
df_revenue_view_2023_Jan), we may consider combining them
into two larger tables.
Tabular Conversion
df_redemptions <-
rbind(df_redemptions_2021, df_redemptions_2022, df_redemptions_2023_Jan)
df_revenue_view <-
rbind(df_revenue_view_2021, df_revenue_view_2022, df_revenue_view_2023_Jan)
L_new <- list(df_brand_tags, df_projects, df_redemptions,
df_revenue_view, df_users_thru_jan23)
Exploratory Data Analysis I on Brand Tags
df_brand_tags <- transform(df_brand_tags,
is_featured = as.numeric(is_featured),
longitude = as.numeric(longitude),
latitude = as.numeric(latitude))
summary(df_brand_tags)
## name project_id project_location_id
## Length:1821 Length:1821 Length:1821
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## project_location_name tag tag_category category_desc
## Length:1821 Length:1821 Length:1821 Length:1821
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## is_featured address longitude latitude
## Min. :1 Length:1821 Min. :-157.83 Min. :21.28
## 1st Qu.:1 Class :character 1st Qu.:-118.31 1st Qu.:34.10
## Median :1 Mode :character Median : -84.37 Median :38.91
## Mean :1 Mean : -92.58 Mean :37.68
## 3rd Qu.:1 3rd Qu.: -74.02 3rd Qu.:40.75
## Max. :1 Max. : -71.04 Max. :47.75
## city state created_at
## Length:1821 Length:1821 Length:1821
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
mapview(df_brand_tags, xcol = "longitude", ycol = "latitude", crs = 4269,
grid = FALSE, map.types = "Stamen.Toner")
group_by(df_brand_tags, tag) |> count() |> arrange(desc(n))
group_by(df_brand_tags, tag_category, category_desc) |> count() |> arrange(desc(n))
group_by(df_brand_tags, city, state) |> count() |> arrange(desc(n))
ggplot(df_brand_tags) + geom_bar(aes(x = category_desc, fill = state))
Exploratory Data Analysis II on Projects
df_projects <- data.frame(df_projects)
proj <- group_by(df_projects, project_id, project_name, city, state) |>
count() |> arrange(desc(n))
proj
proj_ny <- filter(proj, state == "NY")
ggplot(proj_ny) +
geom_col(aes(y = proj_ny$project_name, x = proj_ny$n, fill = proj_ny$city)) +
xlab("Number of Projects") + ylab("Name of Projects")
proj_tx <- filter(proj, state == "TX")
ggplot(proj_tx) +
geom_col(aes(y = proj_tx$project_name, x = proj_tx$n, fill = proj_tx$city)) +
xlab("Number of Projects") + ylab("Name of Projects")
proj_wa <- filter(proj, state == "WA")
ggplot(proj_wa) +
geom_col(aes(y = proj_wa$project_name, x = proj_wa$n, fill = proj_wa$city)) +
xlab("Number of Projects") + ylab("Name of Projects")
Exploratory Data Analysis III on Redemptions
df_redemptions <- data.frame(df_redemptions)
nrow(df_redemptions)
## [1] 183687
ncol(df_redemptions)
## [1] 14
colnames(df_redemptions)
## [1] "user_id" "created_at"
## [3] "project_id" "project.name"
## [5] "Location" "project_location_id"
## [7] "city" "state"
## [9] "credit_transaction_id" "total_redemption_amount"
## [11] "tip" "Venue.Type...Detail"
## [13] "Check.Average" "Service.Type"
is.atomic(df_redemptions)
## [1] FALSE
unique(df_redemptions$Venue.Type...Detail)
## [1] "Casual Fine Dining" "Nightclub" "Unknown"
## [4] "Casual Dining" "Fast Casual" "Fine Dining"
## [7] "Cafe" "Bar" "Coffee Shop"
## [10] "Market" "Fast Food"
unique(df_redemptions$Check.Average)
## [1] "Mid" "Low" "High" "Very Low" "Very High"
unique(df_redemptions$Service.Type)
## [1] "Full Service" "Unknown" "Fast Casual" "QSR"
## [5] "Ghost Kitchen"
df_redemptions <- transform(df_redemptions,
total_redemption_amount = as.numeric(total_redemption_amount),
tip = as.numeric(tip))
summary(df_redemptions)
## user_id created_at project_id project.name
## Length:183687 Length:183687 Length:183687 Length:183687
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Location project_location_id city state
## Length:183687 Length:183687 Length:183687 Length:183687
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## credit_transaction_id total_redemption_amount tip
## Length:183687 Min. : 0.00 Min. : 0.00
## Class :character 1st Qu.: 26.76 1st Qu.: 6.00
## Mode :character Median : 62.00 Median : 14.01
## Mean : 118.56 Mean : 25.54
## 3rd Qu.: 129.57 3rd Qu.: 28.46
## Max. :31250.00 Max. :11002.00
## NA's :45316
## Venue.Type...Detail Check.Average Service.Type
## Length:183687 Length:183687 Length:183687
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
ols_redemptions <- lm(tip ~ total_redemption_amount*Service.Type*Check.Average,
data = df_redemptions)
summary(ols_redemptions)
##
## Call:
## lm(formula = tip ~ total_redemption_amount * Service.Type * Check.Average,
## data = df_redemptions)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2896.0 -2.3 -0.3 0.6 10991.8
##
## Coefficients: (18 not defined because of singularities)
## Estimate
## (Intercept) 1.793259
## total_redemption_amount 0.152535
## Service.TypeFull Service -2.437781
## Service.TypeGhost Kitchen -0.795666
## Service.TypeQSR -1.117294
## Service.TypeUnknown 3.235416
## Check.AverageLow -0.561204
## Check.AverageMid -1.497949
## Check.AverageVery High 4.503117
## Check.AverageVery Low -1.577021
## total_redemption_amount:Service.TypeFull Service 0.050881
## total_redemption_amount:Service.TypeGhost Kitchen 0.001499
## total_redemption_amount:Service.TypeQSR -0.031016
## total_redemption_amount:Service.TypeUnknown 0.073094
## total_redemption_amount:Check.AverageLow -0.049575
## total_redemption_amount:Check.AverageMid 0.027365
## total_redemption_amount:Check.AverageVery High -0.013735
## total_redemption_amount:Check.AverageVery Low -0.009887
## Service.TypeFull Service:Check.AverageLow 0.780360
## Service.TypeGhost Kitchen:Check.AverageLow NA
## Service.TypeQSR:Check.AverageLow 0.237122
## Service.TypeUnknown:Check.AverageLow NA
## Service.TypeFull Service:Check.AverageMid 1.703990
## Service.TypeGhost Kitchen:Check.AverageMid NA
## Service.TypeQSR:Check.AverageMid 10.175765
## Service.TypeUnknown:Check.AverageMid -2.303271
## Service.TypeFull Service:Check.AverageVery High NA
## Service.TypeGhost Kitchen:Check.AverageVery High NA
## Service.TypeQSR:Check.AverageVery High NA
## Service.TypeUnknown:Check.AverageVery High NA
## Service.TypeFull Service:Check.AverageVery Low 2.258561
## Service.TypeGhost Kitchen:Check.AverageVery Low NA
## Service.TypeQSR:Check.AverageVery Low 1.069066
## Service.TypeUnknown:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow 0.041949
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow 0.075990
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid -0.027970
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid -0.024507
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid -0.069361
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low 0.012009
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low 0.019801
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low NA
## Std. Error
## (Intercept) 1.549512
## total_redemption_amount 0.009609
## Service.TypeFull Service 1.578314
## Service.TypeGhost Kitchen 3.635895
## Service.TypeQSR 6.840529
## Service.TypeUnknown 6.561043
## Check.AverageLow 2.270674
## Check.AverageMid 1.748570
## Check.AverageVery High 0.597073
## Check.AverageVery Low 5.029080
## total_redemption_amount:Service.TypeFull Service 0.009645
## total_redemption_amount:Service.TypeGhost Kitchen 0.046273
## total_redemption_amount:Service.TypeQSR 0.047521
## total_redemption_amount:Service.TypeUnknown 0.015417
## total_redemption_amount:Check.AverageLow 0.029964
## total_redemption_amount:Check.AverageMid 0.014769
## total_redemption_amount:Check.AverageVery High 0.001295
## total_redemption_amount:Check.AverageVery Low 0.197164
## Service.TypeFull Service:Check.AverageLow 2.463496
## Service.TypeGhost Kitchen:Check.AverageLow NA
## Service.TypeQSR:Check.AverageLow 7.539997
## Service.TypeUnknown:Check.AverageLow NA
## Service.TypeFull Service:Check.AverageMid 1.791621
## Service.TypeGhost Kitchen:Check.AverageMid NA
## Service.TypeQSR:Check.AverageMid 7.089032
## Service.TypeUnknown:Check.AverageMid 6.638283
## Service.TypeFull Service:Check.AverageVery High NA
## Service.TypeGhost Kitchen:Check.AverageVery High NA
## Service.TypeQSR:Check.AverageVery High NA
## Service.TypeUnknown:Check.AverageVery High NA
## Service.TypeFull Service:Check.AverageVery Low 5.352948
## Service.TypeGhost Kitchen:Check.AverageVery Low NA
## Service.TypeQSR:Check.AverageVery Low 8.391806
## Service.TypeUnknown:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow 0.031794
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow 0.156117
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid 0.014843
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid 0.052652
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid 0.019822
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low 0.198291
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low 0.204410
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low NA
## t value
## (Intercept) 1.157
## total_redemption_amount 15.875
## Service.TypeFull Service -1.545
## Service.TypeGhost Kitchen -0.219
## Service.TypeQSR -0.163
## Service.TypeUnknown 0.493
## Check.AverageLow -0.247
## Check.AverageMid -0.857
## Check.AverageVery High 7.542
## Check.AverageVery Low -0.314
## total_redemption_amount:Service.TypeFull Service 5.275
## total_redemption_amount:Service.TypeGhost Kitchen 0.032
## total_redemption_amount:Service.TypeQSR -0.653
## total_redemption_amount:Service.TypeUnknown 4.741
## total_redemption_amount:Check.AverageLow -1.655
## total_redemption_amount:Check.AverageMid 1.853
## total_redemption_amount:Check.AverageVery High -10.603
## total_redemption_amount:Check.AverageVery Low -0.050
## Service.TypeFull Service:Check.AverageLow 0.317
## Service.TypeGhost Kitchen:Check.AverageLow NA
## Service.TypeQSR:Check.AverageLow 0.031
## Service.TypeUnknown:Check.AverageLow NA
## Service.TypeFull Service:Check.AverageMid 0.951
## Service.TypeGhost Kitchen:Check.AverageMid NA
## Service.TypeQSR:Check.AverageMid 1.435
## Service.TypeUnknown:Check.AverageMid -0.347
## Service.TypeFull Service:Check.AverageVery High NA
## Service.TypeGhost Kitchen:Check.AverageVery High NA
## Service.TypeQSR:Check.AverageVery High NA
## Service.TypeUnknown:Check.AverageVery High NA
## Service.TypeFull Service:Check.AverageVery Low 0.422
## Service.TypeGhost Kitchen:Check.AverageVery Low NA
## Service.TypeQSR:Check.AverageVery Low 0.127
## Service.TypeUnknown:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow 1.319
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow 0.487
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid -1.884
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid -0.465
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid -3.499
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low 0.061
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low 0.097
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low NA
## Pr(>|t|)
## (Intercept) 0.247150
## total_redemption_amount < 2e-16
## Service.TypeFull Service 0.122458
## Service.TypeGhost Kitchen 0.826778
## Service.TypeQSR 0.870255
## Service.TypeUnknown 0.621925
## Check.AverageLow 0.804790
## Check.AverageMid 0.391628
## Check.AverageVery High 4.66e-14
## Check.AverageVery Low 0.753840
## total_redemption_amount:Service.TypeFull Service 1.33e-07
## total_redemption_amount:Service.TypeGhost Kitchen 0.974159
## total_redemption_amount:Service.TypeQSR 0.513966
## total_redemption_amount:Service.TypeUnknown 2.13e-06
## total_redemption_amount:Check.AverageLow 0.098026
## total_redemption_amount:Check.AverageMid 0.063915
## total_redemption_amount:Check.AverageVery High < 2e-16
## total_redemption_amount:Check.AverageVery Low 0.960008
## Service.TypeFull Service:Check.AverageLow 0.751419
## Service.TypeGhost Kitchen:Check.AverageLow NA
## Service.TypeQSR:Check.AverageLow 0.974912
## Service.TypeUnknown:Check.AverageLow NA
## Service.TypeFull Service:Check.AverageMid 0.341561
## Service.TypeGhost Kitchen:Check.AverageMid NA
## Service.TypeQSR:Check.AverageMid 0.151169
## Service.TypeUnknown:Check.AverageMid 0.728616
## Service.TypeFull Service:Check.AverageVery High NA
## Service.TypeGhost Kitchen:Check.AverageVery High NA
## Service.TypeQSR:Check.AverageVery High NA
## Service.TypeUnknown:Check.AverageVery High NA
## Service.TypeFull Service:Check.AverageVery Low 0.673078
## Service.TypeGhost Kitchen:Check.AverageVery Low NA
## Service.TypeQSR:Check.AverageVery Low 0.898629
## Service.TypeUnknown:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow 0.187045
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow NA
## total_redemption_amount:Service.TypeQSR:Check.AverageLow 0.626436
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid 0.059506
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid NA
## total_redemption_amount:Service.TypeQSR:Check.AverageMid 0.641605
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid 0.000467
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High NA
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High NA
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High NA
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low 0.951706
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low NA
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low 0.922832
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low NA
##
## (Intercept)
## total_redemption_amount ***
## Service.TypeFull Service
## Service.TypeGhost Kitchen
## Service.TypeQSR
## Service.TypeUnknown
## Check.AverageLow
## Check.AverageMid
## Check.AverageVery High ***
## Check.AverageVery Low
## total_redemption_amount:Service.TypeFull Service ***
## total_redemption_amount:Service.TypeGhost Kitchen
## total_redemption_amount:Service.TypeQSR
## total_redemption_amount:Service.TypeUnknown ***
## total_redemption_amount:Check.AverageLow .
## total_redemption_amount:Check.AverageMid .
## total_redemption_amount:Check.AverageVery High ***
## total_redemption_amount:Check.AverageVery Low
## Service.TypeFull Service:Check.AverageLow
## Service.TypeGhost Kitchen:Check.AverageLow
## Service.TypeQSR:Check.AverageLow
## Service.TypeUnknown:Check.AverageLow
## Service.TypeFull Service:Check.AverageMid
## Service.TypeGhost Kitchen:Check.AverageMid
## Service.TypeQSR:Check.AverageMid
## Service.TypeUnknown:Check.AverageMid
## Service.TypeFull Service:Check.AverageVery High
## Service.TypeGhost Kitchen:Check.AverageVery High
## Service.TypeQSR:Check.AverageVery High
## Service.TypeUnknown:Check.AverageVery High
## Service.TypeFull Service:Check.AverageVery Low
## Service.TypeGhost Kitchen:Check.AverageVery Low
## Service.TypeQSR:Check.AverageVery Low
## Service.TypeUnknown:Check.AverageVery Low
## total_redemption_amount:Service.TypeFull Service:Check.AverageLow
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageLow
## total_redemption_amount:Service.TypeQSR:Check.AverageLow
## total_redemption_amount:Service.TypeUnknown:Check.AverageLow
## total_redemption_amount:Service.TypeFull Service:Check.AverageMid .
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageMid
## total_redemption_amount:Service.TypeQSR:Check.AverageMid
## total_redemption_amount:Service.TypeUnknown:Check.AverageMid ***
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery High
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery High
## total_redemption_amount:Service.TypeQSR:Check.AverageVery High
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery High
## total_redemption_amount:Service.TypeFull Service:Check.AverageVery Low
## total_redemption_amount:Service.TypeGhost Kitchen:Check.AverageVery Low
## total_redemption_amount:Service.TypeQSR:Check.AverageVery Low
## total_redemption_amount:Service.TypeUnknown:Check.AverageVery Low
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.07 on 138339 degrees of freedom
## (45316 observations deleted due to missingness)
## Multiple R-squared: 0.5027, Adjusted R-squared: 0.5026
## F-statistic: 4511 on 31 and 138339 DF, p-value: < 2.2e-16
anova(ols_redemptions)
ggplot(df_redemptions) + geom_point(aes(x = total_redemption_amount, y = tip))
## Warning: Removed 45316 rows containing missing values (`geom_point()`).
plot(x = fitted(ols_redemptions), y = residuals(ols_redemptions))
qqnorm(ols_redemptions$residual, ylab = "Residuals")
qqline(ols_redemptions$residual)
hist(ols_redemptions$residual, xlab = "Residuals")
# Cauchy distribution (Heavy tails)
halfnorm(lm.influence(ols_redemptions)$hat, nlab = 5, ylab = "Leverages")
df_redemptions[c(54470, 114859, 38707, 43022, 32011),]
# 5 likely high leverage points
halfnorm(cooks.distance(ols_redemptions), nlab = 1, ylab = "Cook's distance")
df_redemptions[c(99741),]
# 1 serious influential point
ti <- rstudent(ols_redemptions)
max(abs(ti))
## [1] 280.6126
which(ti == max(abs(ti)))
## V19829.2
## 137461
p_value <- 2*(1-pt(max(abs(ti)),
df = nrow(df_redemptions) - ncol(df_redemptions) - 1))
alpha_over_n <- 0.05/nrow(df_redemptions)
p_value > alpha_over_n
## [1] FALSE
# no outliers reported by Bonferroni correction
df_redemptions
df_redemp_taco1986 <- filter(df_redemptions, project.name == "Tacos 1986")
ggplot(df_redemp_taco1986) + geom_point(aes(x = df_redemp_taco1986$created_at,
y = df_redemp_taco1986$total_redemption_amount)) + xlab("Date (YMD-HMS)") +
ylab("Total Redemption Amount") +
ggtitle("Time-Series Plot of Total Redemption Amount for Tacos 1986")
df_redemp_taco1986_burbank <- filter(df_redemp_taco1986, city == "Burbank")
ggplot(df_redemp_taco1986_burbank) + geom_point(aes(
x = df_redemp_taco1986_burbank$created_at,
y = df_redemp_taco1986_burbank$total_redemption_amount)) +
xlab("Date (YMD-HMS)") + ylab("Total Redemption Amount") +
ggtitle("Time-Series Plot of Total Redemption Amount for Tacos 1986 in Burbank")
df_redemp_taco1986_burbank_2023 <- filter(df_redemp_taco1986_burbank,
year(df_redemp_taco1986_burbank$created_at) == 2023)
ggplot(df_redemp_taco1986_burbank_2023) + geom_point(aes(
x = df_redemp_taco1986_burbank_2023$created_at,
y = df_redemp_taco1986_burbank_2023$total_redemption_amount)) +
xlab("Date (YMD-HMS)") + ylab("Total Redemption Amount") +
ggtitle("Time-Series Plot of Total Redemption Amount for Tacos 1986 in
Burbank in Year 2023")
Exploratory Data Analysis IV on Revenue View
df_revenue_view <- data.frame(df_revenue_view)
df_revenue_view <- transform(df_revenue_view,
amount_charged_in_usd = as.numeric(amount_charged_in_usd),
credit_given_in_usd = as.numeric(credit_given_in_usd))
summary(df_revenue_view)
## UID name project_id gcp_id
## Length:143806 Length:143806 Length:143806 Length:143806
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## ict_id created_at amount_charged_in_usd utm_campaign
## Length:143806 Length:143806 Min. : -9999.00 Length:143806
## Class :character Class :character 1st Qu.: 50.00 Class :character
## Mode :character Mode :character Median : 77.83 Mode :character
## Mean : 180.35
## 3rd Qu.: 200.00
## Max. :100000.00
## utm_medium utm_content utm_source is_app_purchase
## Length:143806 Length:143806 Length:143806 Length:143806
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## user_id transaction_type credit_type credit_given_in_usd
## Length:143806 Length:143806 Length:143806 Min. :-15000.0
## Class :character Class :character Class :character 1st Qu.: 50.0
## Mode :character Mode :character Mode :character Median : 84.6
## Mean : 224.0
## 3rd Qu.: 250.0
## Max. :120000.0
## is_excess stripe_brand option_id
## Length:143806 Length:143806 Length:143806
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
head(df_revenue_view)
ggplot(df_revenue_view) +
geom_violin(aes(x = credit_given_in_usd, y = stripe_brand)) +
xlab("Amount of credit (USD)") + ylab("Type of credit cards")
df_revenue_view_pac <- filter(df_revenue_view, name == "Penny Ann's Cafe")
ggplot(df_revenue_view_pac, aes(x = amount_charged_in_usd,
y = credit_given_in_usd)) + geom_point() +
stat_smooth(se = TRUE, method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
df_revenue_view_pac_mastercard <- filter(df_revenue_view_pac,
stripe_brand == "MasterCard")
ggplot(df_revenue_view_pac_mastercard, aes(x = amount_charged_in_usd,
y = credit_given_in_usd)) + geom_point() +
stat_smooth(se = TRUE, method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
df_revenue_view_card <- group_by(df_revenue_view, stripe_brand) |> count()
ggplot(df_revenue_view_card, aes(x="", y= df_revenue_view_card$n,
fill = df_revenue_view_card$stripe_brand)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +
geom_text(aes(label =
paste0(round(100*df_revenue_view_card$n/sum(df_revenue_view_card$n),2), "%")),
position = position_stack(vjust=0.5)) +
labs(x = NULL, y = NULL) +
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()) +
scale_fill_brewer(palette="Blues")
df_revenue_view
ggplot(df_revenue_view) + geom_histogram(aes(x = amount_charged_in_usd)) + ylim(c(0, 30))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (`geom_bar()`).
ggplot(df_revenue_view) + geom_histogram(aes(x = credit_given_in_usd)) + xlim(c(-500, 8000))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 94 rows containing non-finite values (`stat_bin()`).
## Removed 2 rows containing missing values (`geom_bar()`).
mosaic(~ stripe_brand + is_app_purchase, df_revenue_view)
mosaic(~ transaction_type + credit_type, df_revenue_view)
Exploratory Data Analysis V on Users
df_users_thru_jan23 <- data.frame(df_users_thru_jan23)
df_users_thru_jan23_platform <- group_by(df_users_thru_jan23, user_app_platform) |> count()
ggplot(df_users_thru_jan23_platform, aes(x="", y= df_users_thru_jan23_platform$n,
fill = df_users_thru_jan23_platform$user_app_platform)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +
geom_text(aes(label =
paste0(round(100*df_users_thru_jan23_platform$n/sum(df_users_thru_jan23_platform$n),2), "%")),
position = position_stack(vjust=0.5)) +
labs(x = NULL, y = NULL) +
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()) +
scale_fill_brewer(palette="Reds")
df_users_mutate <- mutate(df_users_thru_jan23, created_year = year(account_created_at))
df_users_by_year <- group_by(df_users_mutate, created_year, user_app_platform) |> count()
ggplot(df_users_by_year) + geom_col(aes(x = created_year, y = df_users_by_year$n,
fill = df_users_by_year$user_app_platform))