knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
library(bpCausal)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(rlang)
##
## Attaching package: 'rlang'
## The following object is masked from 'package:data.table':
##
## :=
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
PDF_JA <- read.csv("/Users/apple/Quantitative\ Marketing\ Research/Statistical\ Modeling\ V/Statistical\ Modeling\ V\ Data/PDF_JA.csv")
X_U <- read.csv("/Users/apple/Quantitative\ Marketing\ Research/Statistical\ Modeling\ XII/Statistical\ Modeling\ XII\ Essential\ Data/X_U.csv")
X_P <- read.csv("/Users/apple/Quantitative\ Marketing\ Research/Statistical\ Modeling\ XII/Statistical\ Modeling\ XII\ Essential\ Data/X_P.csv")
The following codes will refine previous codes in Statistical Modeling IX and Statistical Modeling XI a.
TSCS <-
PDF_JA[, c(1:3, 8:9, 14, 16, 4:5, 12, 17, 6:7, 13, 18, 10:11, 15, 19:67)]
TSCS$D <- 1 - TSCS$D
TSCS_Y <- TSCS[, -c(4, 8, 12, 16, 61:62, 64:66)]
colnames(TSCS_Y)[c(1:14)] <-
c("id", "T", "D",
"F_trans", "M_trans", "R_trans",
"F_redem", "M_redem", "R_redem",
"F_charg", "M_charg", "R_charg",
"F_tip", "M_tip")
colnames(TSCS_Y)[c(15:17)] <-
c("is_reconcile", "suspicious_is_promo", "utm_is_promo")
split_pairs <- strsplit(as.character(TSCS_Y$id), "-")
TSCS_Y$user_id <- sapply(split_pairs, "[[", 1)
TSCS_Y$proj_id <- sapply(split_pairs, "[[", 2)
TSCS_Y$user_id <- as.integer(TSCS_Y$user_id)
TSCS_Y$proj_id <- as.integer(TSCS_Y$proj_id)
TSCS_Y <- TSCS_Y[, c(1, 59:60, 2:58)]
unique_pairs <- unique(paste(TSCS_Y$user_id, TSCS_Y$proj_id, sep="-"))
pair_to_int <- setNames(seq_along(unique_pairs), unique_pairs)
TSCS_Y$id <- pair_to_int[paste(TSCS_Y$user_id, TSCS_Y$proj_id, sep="-")]
TSCS_Y$D <- as.integer(TSCS_Y$D)
complete_set <- expand_grid(id = unique(TSCS_Y$id), T = 1:55)
TSCS_Y <- complete_set |>
left_join(TSCS_Y, by = c("id", "T")) |>
group_by(id) |>
fill(user_id, proj_id, .direction = "downup")
TSCS_Y <- TSCS_Y |> ungroup()
Dimpute_D <- function(sub_df) {
first_one_idx <- which(sub_df$D == 1)[1]
if (!is.na(first_one_idx)) {
sub_df[1:(first_one_idx - 1), "D"] <- 0
sub_df[first_one_idx:nrow(sub_df), "D"] <- 1
}
else {
sub_df[, "D"] <- 0
}
return(sub_df)
}
TSCS_Y <- TSCS_Y |> group_by(id) |> group_modify(~impute_D(.x)) |> ungroup()
YsTSCS_Y <- TSCS_Y |> arrange(id) |> group_by(id) |>
mutate(across(starts_with("R_"), ~ifelse(row_number() == 1 & is.na(.), 999, .))) |>
mutate(across(-starts_with("R_"), ~ifelse(row_number() == 1 & is.na(.), 0, .))) |>
ungroup()
TSCS_Y <- TSCS_Y |> arrange(id, T)
f_columns <- grep("^F_", names(TSCS_Y), value = TRUE)
impute_freq <- function(group_df) {
for (col in f_columns) {
group_df[[col]] <- ifelse(is.na(group_df[[col]]), 0, group_df[[col]])
group_df[[col]] <- cumsum(group_df[[col]])
}
return(group_df)
}
TSCS_Y <- TSCS_Y |> group_by(id) |>
group_modify(~ impute_freq(.)) |> ungroup()
TSCS_Y <- as.data.table(TSCS_Y)
m_columns <- grep("^M_", names(TSCS_Y), value = TRUE)
TSCS_Y[, (m_columns) := lapply(.SD, function(x) {
x <- nafill(x, type = "locf")
ifelse(is.na(x), 0, x)
}), by = id, .SDcols = m_columns]
r_columns <- grep("^R_", names(TSCS_Y), value = TRUE)
TSCS_Y[, (r_columns) := lapply(.SD, function(x) {
x <- nafill(x, type = "locf")
ifelse(is.na(x), 999, x)
}), by = id, .SDcols = r_columns]
TSCS_Y$D <- as.integer(TSCS_Y$D)
Each row of the is_reconcile column in
TSCS_Y should be converted to 1 when \(M_{\text{redem}} \leq M_{\text{trans}}\)
and 0 otherwise. Notice that is_reconcile
might be a post-measure variable, so it should not be considered to be
in the covariates.
TSCS_Y$is_reconcile <- as.integer(TSCS_Y$M_redem <= TSCS_Y$M_charg)
utm_is_promo and suspicious_is_promoThe group_by(id) ensures that each id is
handled separately, and then arrange(id) sorts
TSCS_Y1 by id. The first mutate()
with coalesce and lag fills NA
values with the value from the row right above it within each
id. The second mutate() with
replace_na replaces any remaining NA values
with 0. The third mutate() with
cummax ensures that the value can only switch from
0 to 1 and not from 1 to
0 within each id.
TSCS_Y <- TSCS_Y |> group_by(id) |> arrange(id) |>
mutate(suspicious_is_promo = coalesce(suspicious_is_promo, lag(suspicious_is_promo)),
utm_is_promo = coalesce(utm_is_promo, lag(utm_is_promo))) |>
mutate(suspicious_is_promo = replace_na(suspicious_is_promo, 0),
utm_is_promo = replace_na(utm_is_promo, 0)) |>
mutate(suspicious_is_promo = cummax(suspicious_is_promo),
utm_is_promo = cummax(utm_is_promo)) |> ungroup()
The following algorithm has shown that utm_is_promo and
suspicious_is_promo is NOT
time-varying.
TSCS_Y |> group_by(id) |>
summarise(
unique_utm_is_promo = n_distinct(utm_is_promo, na.rm = TRUE),
unique_suspicious_is_promo = n_distinct(suspicious_is_promo, na.rm = TRUE)
) |>
filter(unique_utm_is_promo > 1 | unique_suspicious_is_promo > 1)
## # A tibble: 0 × 3
## # ℹ 3 variables: id <int>, unique_utm_is_promo <int>,
## # unique_suspicious_is_promo <int>
TSCS_Y <- as.data.table(TSCS_Y)
cols_to_rename <- grep("^freq_", names(TSCS_Y), value = TRUE)
if (length(cols_to_rename) > 0) {
setnames(TSCS_Y, cols_to_rename, sub("^freq_", "max_", cols_to_rename))
}
colnames(TSCS_Y)[42:44] <- c("is_app_purchase_0", "is_app_purchase_1", "is_app_purchase_Missing")
colnames(TSCS_Y)[51:52] <- c("is_excess_0", "is_excess_1")
cols_to_rename <- grep("^max_", names(TSCS_Y), value = TRUE)
We will perform the following tasks here:
Rename Columns: we will change the prefix of all
columns that start with freq_ to
max_.
Clamp Values: we will then replace any values
greater than 1 in these columns with
1.
Impute Missing Values: Then for each unique
id, we will check columns that start with
max_. If any of the values in these columns for that
id is 1, then we will replace all missing
values NA and 0 with 1. If the
columns only have missing values NA or 0 for a
given id, then we will replace missing values
NA with 0. Alternatively, we
can try the following: 1). for each id, directly copy the
exact value from the entry right above this row for this particular
id; 2). after filling with all missing values, for each
id across T, there are any one of these rows
having 1, then we will ensure this id has all
1s for that column across all 55 rows.
if (length(cols_to_rename) > 0) {
TSCS_Y[, (cols_to_rename) := pmin(.SD, 1, na.rm = TRUE), .SDcols = cols_to_rename]
}
TSCS_Y[, (cols_to_rename) := lapply(.SD, function(x) {
x_filled <- zoo::na.locf(x, na.rm = FALSE)
return(x_filled)
}), by = id, .SDcols = cols_to_rename]
TSCS_Y[, (cols_to_rename) := lapply(.SD, function(x) {
if (any(x == 1, na.rm = TRUE)) {
return(rep(1, length(x)))
}
else {
return(x)
}
}), by = id, .SDcols = cols_to_rename]
TSCS_Y[, (cols_to_rename) := lapply(.SD, function(x) ifelse(is.na(x), 0, x)), .SDcols = cols_to_rename]
onehot_check <- function(df, prefix) {
one_hot_cols <- grep(paste0("^", prefix), names(df), value = TRUE)
all_zero_rows <- rowSums(df[, one_hot_cols]) == 0
num_all_zero_rows <- sum(all_zero_rows)
all_one_rows <- rowSums(df[, one_hot_cols]) == length(one_hot_cols)
num_all_one_rows <- sum(all_one_rows)
if (num_all_zero_rows > 0 && num_all_one_rows == 0) {
return("A baseline column has been left out.")
}
else {
return("No baseline column has been left out.")
}
}
TSCS_Y <- as.data.frame(TSCS_Y)
onehot_check(TSCS_Y, "max_Venue.Type...Detail_")
## [1] "A baseline column has been left out."
onehot_check(TSCS_Y, "max_Check.Average_")
## [1] "A baseline column has been left out."
onehot_check(TSCS_Y, "max_Service.Type_")
## [1] "A baseline column has been left out."
onehot_check(TSCS_Y, "max_transaction_type_")
## [1] "A baseline column has been left out."
onehot_check(TSCS_Y, "max_credit_type_")
## [1] "A baseline column has been left out."
onehot_check(TSCS_Y, "max_stripe_brand_")
## [1] "A baseline column has been left out."
Y_TR <- TSCS_Y[, -c(30, 36, 41, 42:44, 47, 50, 51:52, 58:60)]
cols_to_rename_2 <- grep("^max_", names(Y_TR), value = TRUE)
if (length(cols_to_rename_2) > 0) {
setnames(Y_TR, cols_to_rename_2, sub("^max_", "TR_", cols_to_rename_2))
}
Y_TR_U <- left_join(Y_TR, X_U, by = "user_id")
Y_TR_U_P <- left_join(Y_TR_U, X_P, by = "proj_id")
miss_prop <- function(df) {
total_rows <- nrow(df)
missing_props <- numeric(ncol(df))
for (i in seq_along(df)) {
missing_count <- sum(is.na(df[[i]]))
missing_props[i] <- missing_count / total_rows
}
named_missing_props <- setNames(missing_props, names(df))
return(named_missing_props)
}
miss_prop(Y_TR_U_P)
## id
## 0.0000000000
## T
## 0.0000000000
## user_id
## 0.0000000000
## proj_id
## 0.0000000000
## D
## 0.0000000000
## F_trans
## 0.0000000000
## M_trans
## 0.0000000000
## R_trans
## 0.0000000000
## F_redem
## 0.0000000000
## M_redem
## 0.0000000000
## R_redem
## 0.0000000000
## F_charg
## 0.0000000000
## M_charg
## 0.0000000000
## R_charg
## 0.0000000000
## F_tip
## 0.0000000000
## M_tip
## 0.0000000000
## is_reconcile
## 0.0000000000
## suspicious_is_promo
## 0.0000000000
## utm_is_promo
## 0.0000000000
## TR_Venue.Type...Detail_Bar
## 0.0000000000
## TR_Venue.Type...Detail_Cafe
## 0.0000000000
## TR_Venue.Type...Detail_Casual.Dining
## 0.0000000000
## TR_Venue.Type...Detail_Casual.Fine.Dining
## 0.0000000000
## TR_Venue.Type...Detail_Coffee.Shop
## 0.0000000000
## TR_Venue.Type...Detail_Fast.Casual
## 0.0000000000
## TR_Venue.Type...Detail_Fast.Food
## 0.0000000000
## TR_Venue.Type...Detail_Fine.Dining
## 0.0000000000
## TR_Venue.Type...Detail_Market
## 0.0000000000
## TR_Venue.Type...Detail_Nightclub
## 0.0000000000
## TR_Check.Average_High
## 0.0000000000
## TR_Check.Average_Low
## 0.0000000000
## TR_Check.Average_Mid
## 0.0000000000
## TR_Check.Average_Very.High
## 0.0000000000
## TR_Check.Average_Very.Low
## 0.0000000000
## TR_Service.Type_Fast.Casual
## 0.0000000000
## TR_Service.Type_Full.Service
## 0.0000000000
## TR_Service.Type_Ghost.Kitchen
## 0.0000000000
## TR_Service.Type_QSR
## 0.0000000000
## TR_transaction_type_0
## 0.0000000000
## TR_transaction_type_1
## 0.0000000000
## TR_credit_type_0
## 0.0000000000
## TR_credit_type_5
## 0.0000000000
## TR_stripe_brand_American.Express
## 0.0000000000
## TR_stripe_brand_Discover
## 0.0000000000
## TR_stripe_brand_MasterCard
## 0.0000000000
## TR_stripe_brand_Visa
## 0.0000000000
## TR_stripe_brand_Other
## 0.0000000000
## T_acct
## 0.0000000000
## user_app_android
## 0.0000000000
## user_app_ios
## 0.0000000000
## unique_proj_loc_P
## 0.0001694164
## max_is_featured
## 0.0001694164
## mean_longitude
## 0.0001694164
## mean_latitude
## 0.0001694164
## mean_rating
## 0.0001694164
## sum_review_num
## 0.0001694164
## mean_price_level
## 0.0001694164
## yelp_tag_Acne.Treatment
## 0.0001694164
## yelp_tag_Afghan
## 0.0001694164
## yelp_tag_Airport.Terminals
## 0.0001694164
## yelp_tag_American..New.
## 0.0001694164
## yelp_tag_American..Traditional.
## 0.0001694164
## yelp_tag_Arabic
## 0.0001694164
## yelp_tag_Argentine
## 0.0001694164
## yelp_tag_Art.Museums
## 0.0001694164
## yelp_tag_Asian.Fusion
## 0.0001694164
## yelp_tag_Australian
## 0.0001694164
## yelp_tag_Bagels
## 0.0001694164
## yelp_tag_Bakeries
## 0.0001694164
## yelp_tag_Bangladeshi
## 0.0001694164
## yelp_tag_Barbeque
## 0.0001694164
## yelp_tag_Bars
## 0.0001694164
## yelp_tag_Basque
## 0.0001694164
## yelp_tag_Beer
## 0.0001694164
## yelp_tag_Beer.Bar
## 0.0001694164
## yelp_tag_Beer.Gardens
## 0.0001694164
## yelp_tag_Belgian
## 0.0001694164
## yelp_tag_Bikes
## 0.0001694164
## yelp_tag_Brasseries
## 0.0001694164
## yelp_tag_Breakfast...Brunch
## 0.0001694164
## yelp_tag_Breweries
## 0.0001694164
## yelp_tag_Brewpubs
## 0.0001694164
## yelp_tag_British
## 0.0001694164
## yelp_tag_Bubble.Tea
## 0.0001694164
## yelp_tag_Burgers
## 0.0001694164
## yelp_tag_Butcher
## 0.0001694164
## yelp_tag_CSA
## 0.0001694164
## yelp_tag_Cafes
## 0.0001694164
## yelp_tag_Cajun.Creole
## 0.0001694164
## yelp_tag_Cambodian
## 0.0001694164
## yelp_tag_Cannabis.Clinics
## 0.0001694164
## yelp_tag_Cannabis.Dispensaries
## 0.0001694164
## yelp_tag_Cantonese
## 0.0001694164
## yelp_tag_Caribbean
## 0.0001694164
## yelp_tag_Caterers
## 0.0001694164
## yelp_tag_Champagne.Bars
## 0.0001694164
## yelp_tag_Cheesesteaks
## 0.0001694164
## yelp_tag_Chicken.Shop
## 0.0001694164
## yelp_tag_Chicken.Wings
## 0.0001694164
## yelp_tag_Chinese
## 0.0001694164
## yelp_tag_Chocolatiers...Shops
## 0.0001694164
## yelp_tag_Cideries
## 0.0001694164
## yelp_tag_Cocktail.Bars
## 0.0001694164
## yelp_tag_Coffee...Tea
## 0.0001694164
## yelp_tag_Coffee.Roasteries
## 0.0001694164
## yelp_tag_Colleges...Universities
## 0.0001694164
## yelp_tag_Colombian
## 0.0001694164
## yelp_tag_Comfort.Food
## 0.0001694164
## yelp_tag_Comic.Books
## 0.0001694164
## yelp_tag_Cooking.Schools
## 0.0001694164
## yelp_tag_Cosmetic.Dentists
## 0.0001694164
## yelp_tag_Cuban
## 0.0001694164
## yelp_tag_Dance.Clubs
## 0.0001694164
## yelp_tag_Delis
## 0.0001694164
## yelp_tag_Desserts
## 0.0001694164
## yelp_tag_Dim.Sum
## 0.0001694164
## yelp_tag_Diners
## 0.0001694164
## yelp_tag_Donuts
## 0.0001694164
## yelp_tag_Empanadas
## 0.0001694164
## yelp_tag_Ethiopian
## 0.0001694164
## yelp_tag_Falafel
## 0.0001694164
## yelp_tag_Fast.Food
## 0.0001694164
## yelp_tag_Floral.Designers
## 0.0001694164
## yelp_tag_Florists
## 0.0001694164
## yelp_tag_Food.Court
## 0.0001694164
## yelp_tag_Food.Delivery.Services
## 0.0001694164
## yelp_tag_Food.Stands
## 0.0001694164
## yelp_tag_Food.Trucks
## 0.0001694164
## yelp_tag_French
## 0.0001694164
## yelp_tag_Furniture.Stores
## 0.0001694164
## yelp_tag_Gastropubs
## 0.0001694164
## yelp_tag_General.Dentistry
## 0.0001694164
## yelp_tag_German
## 0.0001694164
## yelp_tag_Gift.Shops
## 0.0001694164
## yelp_tag_Gluten.Free
## 0.0001694164
## yelp_tag_Greek
## 0.0001694164
## yelp_tag_Grocery
## 0.0001694164
## yelp_tag_Halal
## 0.0001694164
## yelp_tag_Health.Markets
## 0.0001694164
## yelp_tag_Home.Decor
## 0.0001694164
## yelp_tag_Hookah.Bars
## 0.0001694164
## yelp_tag_Hospitals
## 0.0001694164
## yelp_tag_Hot.Dogs
## 0.0001694164
## yelp_tag_Hotels
## 0.0001694164
## yelp_tag_Ice.Cream...Frozen.Yogurt
## 0.0001694164
## yelp_tag_Indian
## 0.0001694164
## yelp_tag_Italian
## 0.0001694164
## yelp_tag_Izakaya
## 0.0001694164
## yelp_tag_Japanese
## 0.0001694164
## yelp_tag_Jewelry
## 0.0001694164
## yelp_tag_Juice.Bars...Smoothies
## 0.0001694164
## yelp_tag_Knife.Sharpening
## 0.0001694164
## yelp_tag_Korean
## 0.0001694164
## yelp_tag_Laser.Hair.Removal
## 0.0001694164
## yelp_tag_Latin.American
## 0.0001694164
## yelp_tag_Lebanese
## 0.0001694164
## yelp_tag_Limos
## 0.0001694164
## yelp_tag_Live.Raw.Food
## 0.0001694164
## yelp_tag_Local.Flavor
## 0.0001694164
## yelp_tag_Lounges
## 0.0001694164
## yelp_tag_Massage
## 0.0001694164
## yelp_tag_Massage.Therapy
## 0.0001694164
## yelp_tag_Meat.Shops
## 0.0001694164
## yelp_tag_Mediterranean
## 0.0001694164
## yelp_tag_Mexican
## 0.0001694164
## yelp_tag_Middle.Eastern
## 0.0001694164
## yelp_tag_Middle.Schools...High.Schools
## 0.0001694164
## yelp_tag_Modern.European
## 0.0001694164
## yelp_tag_Music.Venues
## 0.0001694164
## yelp_tag_New.Mexican.Cuisine
## 0.0001694164
## yelp_tag_Noodles
## 0.0001694164
## yelp_tag_Nutritionists
## 0.0001694164
## yelp_tag_Organic.Stores
## 0.0001694164
## yelp_tag_Pan.Asian
## 0.0001694164
## yelp_tag_Party...Event.Planning
## 0.0001694164
## yelp_tag_Pasta.Shops
## 0.0001694164
## yelp_tag_Patisserie.Cake.Shop
## 0.0001694164
## yelp_tag_Personal.Chefs
## 0.0001694164
## yelp_tag_Peruvian
## 0.0001694164
## yelp_tag_Pet.Boarding
## 0.0001694164
## yelp_tag_Pet.Groomers
## 0.0001694164
## yelp_tag_Pet.Training
## 0.0001694164
## yelp_tag_Pizza
## 0.0001694164
## yelp_tag_Poke
## 0.0001694164
## yelp_tag_Pop.Up.Restaurants
## 0.0001694164
## yelp_tag_Professional.Sports.Teams
## 0.0001694164
## yelp_tag_Pubs
## 0.0001694164
## yelp_tag_Ramen
## 0.0001694164
## yelp_tag_Restaurants
## 0.0001694164
## yelp_tag_Salad
## 0.0001694164
## yelp_tag_Sandwiches
## 0.0001694164
## yelp_tag_Seafood
## 0.0001694164
## yelp_tag_Skin.Care
## 0.0001694164
## yelp_tag_Smokehouse
## 0.0001694164
## yelp_tag_Soul.Food
## 0.0001694164
## yelp_tag_Soup
## 0.0001694164
## yelp_tag_Southern
## 0.0001694164
## yelp_tag_Spanish
## 0.0001694164
## yelp_tag_Speakeasies
## 0.0001694164
## yelp_tag_Specialty.Food
## 0.0001694164
## yelp_tag_Sports.Bars
## 0.0001694164
## yelp_tag_Stadiums...Arenas
## 0.0001694164
## yelp_tag_Steakhouses
## 0.0001694164
## yelp_tag_Street.Vendors
## 0.0001694164
## yelp_tag_Sushi.Bars
## 0.0001694164
## yelp_tag_Tacos
## 0.0001694164
## yelp_tag_Taiwanese
## 0.0001694164
## yelp_tag_Tapas.Bars
## 0.0001694164
## yelp_tag_Tapas.Small.Plates
## 0.0001694164
## yelp_tag_Tex.Mex
## 0.0001694164
## yelp_tag_Thai
## 0.0001694164
## yelp_tag_Tiki.Bars
## 0.0001694164
## yelp_tag_Train.Stations
## 0.0001694164
## yelp_tag_Turkish
## 0.0001694164
## yelp_tag_Used
## 0.0001694164
## yelp_tag_Vegan
## 0.0001694164
## yelp_tag_Vegetarian
## 0.0001694164
## yelp_tag_Venues...Event.Spaces
## 0.0001694164
## yelp_tag_Vietnamese
## 0.0001694164
## yelp_tag_Vintage...Consignment
## 0.0001694164
## yelp_tag_Waffles
## 0.0001694164
## yelp_tag_Whiskey.Bars
## 0.0001694164
## yelp_tag_Wholesalers
## 0.0001694164
## yelp_tag_Wine...Spirits
## 0.0001694164
## yelp_tag_Wine.Bars
## 0.0001694164
## yelp_tag_Wine.Tours
## 0.0001694164
## yelp_tag_Wineries
## 0.0001694164
## yelp_tag_Wraps
## 0.0001694164
Y_TR_U_P_D0 <- filter(Y_TR_U_P, D == 0)
D0_id_count <- as.data.table(Y_TR_U_P_D0)[, .(count = .N), by = id]
ggplot(D0_id_count, aes(x = count)) +
geom_histogram(aes(fill = ..count..), bins = 50, alpha = 0.5) +
scale_fill_gradient(low = "pink", high = "red") +
labs(title = "Histogram of Observed Period Length",
x = "Observed Period Length",
y = "Frequency") +
theme_minimal()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
id_oAGCustomers with observed periods equal to 1 will be
placed in id_oAG.
id_oAG <- D0_id_count[count == 1, .(id)]
paste("count:", nrow(id_oAG))
## [1] "count: 54387"
paste("proportion:", round(nrow(id_oAG)/76734, 2))
## [1] "proportion: 0.71"
id_sAGCustomers with observed periods between 2,
3, 4, and 5 will be placed in
id_sAG.
id_sAG <- D0_id_count[count %in% c(2, 3, 4, 5), .(id)]
paste("count:", nrow(id_sAG))
## [1] "count: 7446"
paste("proportion:", round(nrow(id_sAG)/76734, 2))
## [1] "proportion: 0.1"
id_lAGCustomers with observed periods greater than 5 will be
placed in id_lAG.
id_lAG <- D0_id_count[count > 5, .(id)]
paste("count:", nrow(id_lAG))
## [1] "count: 14901"
paste("proportion:", round(nrow(id_lAG)/76734, 2))
## [1] "proportion: 0.19"
Y <- Y_TR_U_P
Y_oAG <- filter(Y, id %in% id_oAG$id)
Y_sAG <- filter(Y, id %in% id_sAG$id)
Y_lAG <- filter(Y, id %in% id_lAG$id)
Y_slAG <- filter(Y, !id %in% id_oAG$id)
Great! Only 1 id is missing for all project-level
covariates excluding one-and-gone customers. We will simply drop it.
filter(Y_sAG, is.na(unique_proj_loc_P))$id |> unique()
## integer(0)
filter(Y_lAG, is.na(unique_proj_loc_P))$id |> unique()
## [1] 34758
filter(Y_slAG, is.na(unique_proj_loc_P))$id |> unique()
## [1] 34758
Y_lAG <- filter(Y_lAG, id != 34758)
Y_slAG <- filter(Y_slAG, id != 34758)
miss_prop(Y_slAG)
## id
## 0
## T
## 0
## user_id
## 0
## proj_id
## 0
## D
## 0
## F_trans
## 0
## M_trans
## 0
## R_trans
## 0
## F_redem
## 0
## M_redem
## 0
## R_redem
## 0
## F_charg
## 0
## M_charg
## 0
## R_charg
## 0
## F_tip
## 0
## M_tip
## 0
## is_reconcile
## 0
## suspicious_is_promo
## 0
## utm_is_promo
## 0
## TR_Venue.Type...Detail_Bar
## 0
## TR_Venue.Type...Detail_Cafe
## 0
## TR_Venue.Type...Detail_Casual.Dining
## 0
## TR_Venue.Type...Detail_Casual.Fine.Dining
## 0
## TR_Venue.Type...Detail_Coffee.Shop
## 0
## TR_Venue.Type...Detail_Fast.Casual
## 0
## TR_Venue.Type...Detail_Fast.Food
## 0
## TR_Venue.Type...Detail_Fine.Dining
## 0
## TR_Venue.Type...Detail_Market
## 0
## TR_Venue.Type...Detail_Nightclub
## 0
## TR_Check.Average_High
## 0
## TR_Check.Average_Low
## 0
## TR_Check.Average_Mid
## 0
## TR_Check.Average_Very.High
## 0
## TR_Check.Average_Very.Low
## 0
## TR_Service.Type_Fast.Casual
## 0
## TR_Service.Type_Full.Service
## 0
## TR_Service.Type_Ghost.Kitchen
## 0
## TR_Service.Type_QSR
## 0
## TR_transaction_type_0
## 0
## TR_transaction_type_1
## 0
## TR_credit_type_0
## 0
## TR_credit_type_5
## 0
## TR_stripe_brand_American.Express
## 0
## TR_stripe_brand_Discover
## 0
## TR_stripe_brand_MasterCard
## 0
## TR_stripe_brand_Visa
## 0
## TR_stripe_brand_Other
## 0
## T_acct
## 0
## user_app_android
## 0
## user_app_ios
## 0
## unique_proj_loc_P
## 0
## max_is_featured
## 0
## mean_longitude
## 0
## mean_latitude
## 0
## mean_rating
## 0
## sum_review_num
## 0
## mean_price_level
## 0
## yelp_tag_Acne.Treatment
## 0
## yelp_tag_Afghan
## 0
## yelp_tag_Airport.Terminals
## 0
## yelp_tag_American..New.
## 0
## yelp_tag_American..Traditional.
## 0
## yelp_tag_Arabic
## 0
## yelp_tag_Argentine
## 0
## yelp_tag_Art.Museums
## 0
## yelp_tag_Asian.Fusion
## 0
## yelp_tag_Australian
## 0
## yelp_tag_Bagels
## 0
## yelp_tag_Bakeries
## 0
## yelp_tag_Bangladeshi
## 0
## yelp_tag_Barbeque
## 0
## yelp_tag_Bars
## 0
## yelp_tag_Basque
## 0
## yelp_tag_Beer
## 0
## yelp_tag_Beer.Bar
## 0
## yelp_tag_Beer.Gardens
## 0
## yelp_tag_Belgian
## 0
## yelp_tag_Bikes
## 0
## yelp_tag_Brasseries
## 0
## yelp_tag_Breakfast...Brunch
## 0
## yelp_tag_Breweries
## 0
## yelp_tag_Brewpubs
## 0
## yelp_tag_British
## 0
## yelp_tag_Bubble.Tea
## 0
## yelp_tag_Burgers
## 0
## yelp_tag_Butcher
## 0
## yelp_tag_CSA
## 0
## yelp_tag_Cafes
## 0
## yelp_tag_Cajun.Creole
## 0
## yelp_tag_Cambodian
## 0
## yelp_tag_Cannabis.Clinics
## 0
## yelp_tag_Cannabis.Dispensaries
## 0
## yelp_tag_Cantonese
## 0
## yelp_tag_Caribbean
## 0
## yelp_tag_Caterers
## 0
## yelp_tag_Champagne.Bars
## 0
## yelp_tag_Cheesesteaks
## 0
## yelp_tag_Chicken.Shop
## 0
## yelp_tag_Chicken.Wings
## 0
## yelp_tag_Chinese
## 0
## yelp_tag_Chocolatiers...Shops
## 0
## yelp_tag_Cideries
## 0
## yelp_tag_Cocktail.Bars
## 0
## yelp_tag_Coffee...Tea
## 0
## yelp_tag_Coffee.Roasteries
## 0
## yelp_tag_Colleges...Universities
## 0
## yelp_tag_Colombian
## 0
## yelp_tag_Comfort.Food
## 0
## yelp_tag_Comic.Books
## 0
## yelp_tag_Cooking.Schools
## 0
## yelp_tag_Cosmetic.Dentists
## 0
## yelp_tag_Cuban
## 0
## yelp_tag_Dance.Clubs
## 0
## yelp_tag_Delis
## 0
## yelp_tag_Desserts
## 0
## yelp_tag_Dim.Sum
## 0
## yelp_tag_Diners
## 0
## yelp_tag_Donuts
## 0
## yelp_tag_Empanadas
## 0
## yelp_tag_Ethiopian
## 0
## yelp_tag_Falafel
## 0
## yelp_tag_Fast.Food
## 0
## yelp_tag_Floral.Designers
## 0
## yelp_tag_Florists
## 0
## yelp_tag_Food.Court
## 0
## yelp_tag_Food.Delivery.Services
## 0
## yelp_tag_Food.Stands
## 0
## yelp_tag_Food.Trucks
## 0
## yelp_tag_French
## 0
## yelp_tag_Furniture.Stores
## 0
## yelp_tag_Gastropubs
## 0
## yelp_tag_General.Dentistry
## 0
## yelp_tag_German
## 0
## yelp_tag_Gift.Shops
## 0
## yelp_tag_Gluten.Free
## 0
## yelp_tag_Greek
## 0
## yelp_tag_Grocery
## 0
## yelp_tag_Halal
## 0
## yelp_tag_Health.Markets
## 0
## yelp_tag_Home.Decor
## 0
## yelp_tag_Hookah.Bars
## 0
## yelp_tag_Hospitals
## 0
## yelp_tag_Hot.Dogs
## 0
## yelp_tag_Hotels
## 0
## yelp_tag_Ice.Cream...Frozen.Yogurt
## 0
## yelp_tag_Indian
## 0
## yelp_tag_Italian
## 0
## yelp_tag_Izakaya
## 0
## yelp_tag_Japanese
## 0
## yelp_tag_Jewelry
## 0
## yelp_tag_Juice.Bars...Smoothies
## 0
## yelp_tag_Knife.Sharpening
## 0
## yelp_tag_Korean
## 0
## yelp_tag_Laser.Hair.Removal
## 0
## yelp_tag_Latin.American
## 0
## yelp_tag_Lebanese
## 0
## yelp_tag_Limos
## 0
## yelp_tag_Live.Raw.Food
## 0
## yelp_tag_Local.Flavor
## 0
## yelp_tag_Lounges
## 0
## yelp_tag_Massage
## 0
## yelp_tag_Massage.Therapy
## 0
## yelp_tag_Meat.Shops
## 0
## yelp_tag_Mediterranean
## 0
## yelp_tag_Mexican
## 0
## yelp_tag_Middle.Eastern
## 0
## yelp_tag_Middle.Schools...High.Schools
## 0
## yelp_tag_Modern.European
## 0
## yelp_tag_Music.Venues
## 0
## yelp_tag_New.Mexican.Cuisine
## 0
## yelp_tag_Noodles
## 0
## yelp_tag_Nutritionists
## 0
## yelp_tag_Organic.Stores
## 0
## yelp_tag_Pan.Asian
## 0
## yelp_tag_Party...Event.Planning
## 0
## yelp_tag_Pasta.Shops
## 0
## yelp_tag_Patisserie.Cake.Shop
## 0
## yelp_tag_Personal.Chefs
## 0
## yelp_tag_Peruvian
## 0
## yelp_tag_Pet.Boarding
## 0
## yelp_tag_Pet.Groomers
## 0
## yelp_tag_Pet.Training
## 0
## yelp_tag_Pizza
## 0
## yelp_tag_Poke
## 0
## yelp_tag_Pop.Up.Restaurants
## 0
## yelp_tag_Professional.Sports.Teams
## 0
## yelp_tag_Pubs
## 0
## yelp_tag_Ramen
## 0
## yelp_tag_Restaurants
## 0
## yelp_tag_Salad
## 0
## yelp_tag_Sandwiches
## 0
## yelp_tag_Seafood
## 0
## yelp_tag_Skin.Care
## 0
## yelp_tag_Smokehouse
## 0
## yelp_tag_Soul.Food
## 0
## yelp_tag_Soup
## 0
## yelp_tag_Southern
## 0
## yelp_tag_Spanish
## 0
## yelp_tag_Speakeasies
## 0
## yelp_tag_Specialty.Food
## 0
## yelp_tag_Sports.Bars
## 0
## yelp_tag_Stadiums...Arenas
## 0
## yelp_tag_Steakhouses
## 0
## yelp_tag_Street.Vendors
## 0
## yelp_tag_Sushi.Bars
## 0
## yelp_tag_Tacos
## 0
## yelp_tag_Taiwanese
## 0
## yelp_tag_Tapas.Bars
## 0
## yelp_tag_Tapas.Small.Plates
## 0
## yelp_tag_Tex.Mex
## 0
## yelp_tag_Thai
## 0
## yelp_tag_Tiki.Bars
## 0
## yelp_tag_Train.Stations
## 0
## yelp_tag_Turkish
## 0
## yelp_tag_Used
## 0
## yelp_tag_Vegan
## 0
## yelp_tag_Vegetarian
## 0
## yelp_tag_Venues...Event.Spaces
## 0
## yelp_tag_Vietnamese
## 0
## yelp_tag_Vintage...Consignment
## 0
## yelp_tag_Waffles
## 0
## yelp_tag_Whiskey.Bars
## 0
## yelp_tag_Wholesalers
## 0
## yelp_tag_Wine...Spirits
## 0
## yelp_tag_Wine.Bars
## 0
## yelp_tag_Wine.Tours
## 0
## yelp_tag_Wineries
## 0
## yelp_tag_Wraps
## 0
log_transf <- function(x) {
return(log(x + 1))
}
adjusted_minmax <- function(x) {
min_val <- min(x)
max_val <- max(x)
return(10 * ((x - min_val) / (max_val - min_val)))
}
Youtcome_cols <- colnames(Y)[6:16]
Y_tf <- Y
Y_tf[outcome_cols] <- lapply(Y_tf[outcome_cols], log_transf)
Y_tf[outcome_cols] <- lapply(Y_tf[outcome_cols], adjusted_minmax)
Y_slAGY_slAG_tf <- Y_slAG
Y_slAG_tf[outcome_cols] <- lapply(Y_slAG_tf[outcome_cols], log_transf)
Y_slAG_tf[outcome_cols] <- lapply(Y_slAG_tf[outcome_cols], adjusted_minmax)
Y_lAGY_lAG_tf <- Y_lAG
Y_lAG_tf[outcome_cols] <- lapply(Y_lAG_tf[outcome_cols], log_transf)
Y_lAG_tf[outcome_cols] <- lapply(Y_lAG_tf[outcome_cols], adjusted_minmax)
dim(Y_lAG_tf)
## [1] 819500 228
dim(Y_slAG_tf)
## [1] 1229030 228
head(Y_slAG_tf)
## id T user_id proj_id D F_trans M_trans R_trans F_redem M_redem R_redem
## 1 4 1 100005 692 0 0 0 4.725949 1.167918 4.020763 8.182816
## 2 4 2 100005 692 0 0 0 4.725949 1.167918 4.020763 8.182816
## 3 4 3 100005 692 0 0 0 4.725949 1.167918 4.020763 8.182816
## 4 4 4 100005 692 0 0 0 4.725949 1.167918 4.020763 8.182816
## 5 4 5 100005 692 0 0 0 4.725949 1.167918 4.020763 8.182816
## 6 4 6 100005 692 0 0 0 4.725949 1.167918 4.020763 8.182816
## F_charg M_charg R_charg F_tip M_tip is_reconcile suspicious_is_promo
## 1 0 0 4.725949 1.16844 3.098038 0 1
## 2 0 0 4.725949 1.16844 3.098038 0 1
## 3 0 0 4.725949 1.16844 3.098038 0 1
## 4 0 0 4.725949 1.16844 3.098038 0 1
## 5 0 0 4.725949 1.16844 3.098038 0 1
## 6 0 0 4.725949 1.16844 3.098038 0 1
## utm_is_promo TR_Venue.Type...Detail_Bar TR_Venue.Type...Detail_Cafe
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## TR_Venue.Type...Detail_Casual.Dining
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## TR_Venue.Type...Detail_Casual.Fine.Dining TR_Venue.Type...Detail_Coffee.Shop
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## TR_Venue.Type...Detail_Fast.Casual TR_Venue.Type...Detail_Fast.Food
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## TR_Venue.Type...Detail_Fine.Dining TR_Venue.Type...Detail_Market
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## TR_Venue.Type...Detail_Nightclub TR_Check.Average_High TR_Check.Average_Low
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## TR_Check.Average_Mid TR_Check.Average_Very.High TR_Check.Average_Very.Low
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## TR_Service.Type_Fast.Casual TR_Service.Type_Full.Service
## 1 0 1
## 2 0 1
## 3 0 1
## 4 0 1
## 5 0 1
## 6 0 1
## TR_Service.Type_Ghost.Kitchen TR_Service.Type_QSR TR_transaction_type_0
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## TR_transaction_type_1 TR_credit_type_0 TR_credit_type_5
## 1 1 1 0
## 2 1 1 0
## 3 1 1 0
## 4 1 1 0
## 5 1 1 0
## 6 1 1 0
## TR_stripe_brand_American.Express TR_stripe_brand_Discover
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## TR_stripe_brand_MasterCard TR_stripe_brand_Visa TR_stripe_brand_Other T_acct
## 1 0 1 1 35
## 2 0 1 1 35
## 3 0 1 1 35
## 4 0 1 1 35
## 5 0 1 1 35
## 6 0 1 1 35
## user_app_android user_app_ios unique_proj_loc_P max_is_featured
## 1 0 1 31 1
## 2 0 1 31 1
## 3 0 1 31 1
## 4 0 1 31 1
## 5 0 1 31 1
## 6 0 1 31 1
## mean_longitude mean_latitude mean_rating sum_review_num mean_price_level
## 1 -84.02809 39.06675 4 13319 2.137242
## 2 -84.02809 39.06675 4 13319 2.137242
## 3 -84.02809 39.06675 4 13319 2.137242
## 4 -84.02809 39.06675 4 13319 2.137242
## 5 -84.02809 39.06675 4 13319 2.137242
## 6 -84.02809 39.06675 4 13319 2.137242
## yelp_tag_Acne.Treatment yelp_tag_Afghan yelp_tag_Airport.Terminals
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_American..New. yelp_tag_American..Traditional. yelp_tag_Arabic
## 1 1 1 1
## 2 1 1 1
## 3 1 1 1
## 4 1 1 1
## 5 1 1 1
## 6 1 1 1
## yelp_tag_Argentine yelp_tag_Art.Museums yelp_tag_Asian.Fusion
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Australian yelp_tag_Bagels yelp_tag_Bakeries yelp_tag_Bangladeshi
## 1 0 0 1 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 0 1 0
## 5 0 0 1 0
## 6 0 0 1 0
## yelp_tag_Barbeque yelp_tag_Bars yelp_tag_Basque yelp_tag_Beer
## 1 0 1 0 0
## 2 0 1 0 0
## 3 0 1 0 0
## 4 0 1 0 0
## 5 0 1 0 0
## 6 0 1 0 0
## yelp_tag_Beer.Bar yelp_tag_Beer.Gardens yelp_tag_Belgian yelp_tag_Bikes
## 1 0 1 0 0
## 2 0 1 0 0
## 3 0 1 0 0
## 4 0 1 0 0
## 5 0 1 0 0
## 6 0 1 0 0
## yelp_tag_Brasseries yelp_tag_Breakfast...Brunch yelp_tag_Breweries
## 1 0 1 0
## 2 0 1 0
## 3 0 1 0
## 4 0 1 0
## 5 0 1 0
## 6 0 1 0
## yelp_tag_Brewpubs yelp_tag_British yelp_tag_Bubble.Tea yelp_tag_Burgers
## 1 0 0 0 1
## 2 0 0 0 1
## 3 0 0 0 1
## 4 0 0 0 1
## 5 0 0 0 1
## 6 0 0 0 1
## yelp_tag_Butcher yelp_tag_CSA yelp_tag_Cafes yelp_tag_Cajun.Creole
## 1 0 0 1 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 0 1 0
## 5 0 0 1 0
## 6 0 0 1 0
## yelp_tag_Cambodian yelp_tag_Cannabis.Clinics yelp_tag_Cannabis.Dispensaries
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Cantonese yelp_tag_Caribbean yelp_tag_Caterers
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Champagne.Bars yelp_tag_Cheesesteaks yelp_tag_Chicken.Shop
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Chicken.Wings yelp_tag_Chinese yelp_tag_Chocolatiers...Shops
## 1 0 1 0
## 2 0 1 0
## 3 0 1 0
## 4 0 1 0
## 5 0 1 0
## 6 0 1 0
## yelp_tag_Cideries yelp_tag_Cocktail.Bars yelp_tag_Coffee...Tea
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 1 1
## 5 0 1 1
## 6 0 1 1
## yelp_tag_Coffee.Roasteries yelp_tag_Colleges...Universities
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## yelp_tag_Colombian yelp_tag_Comfort.Food yelp_tag_Comic.Books
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Cooking.Schools yelp_tag_Cosmetic.Dentists yelp_tag_Cuban
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Dance.Clubs yelp_tag_Delis yelp_tag_Desserts yelp_tag_Dim.Sum
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## yelp_tag_Diners yelp_tag_Donuts yelp_tag_Empanadas yelp_tag_Ethiopian
## 1 1 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 0 0
## 5 1 0 0 0
## 6 1 0 0 0
## yelp_tag_Falafel yelp_tag_Fast.Food yelp_tag_Floral.Designers
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Florists yelp_tag_Food.Court yelp_tag_Food.Delivery.Services
## 1 0 1 0
## 2 0 1 0
## 3 0 1 0
## 4 0 1 0
## 5 0 1 0
## 6 0 1 0
## yelp_tag_Food.Stands yelp_tag_Food.Trucks yelp_tag_French
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Furniture.Stores yelp_tag_Gastropubs yelp_tag_General.Dentistry
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_German yelp_tag_Gift.Shops yelp_tag_Gluten.Free yelp_tag_Greek
## 1 1 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 0 0
## 5 1 0 0 0
## 6 1 0 0 0
## yelp_tag_Grocery yelp_tag_Halal yelp_tag_Health.Markets yelp_tag_Home.Decor
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## yelp_tag_Hookah.Bars yelp_tag_Hospitals yelp_tag_Hot.Dogs yelp_tag_Hotels
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## yelp_tag_Ice.Cream...Frozen.Yogurt yelp_tag_Indian yelp_tag_Italian
## 1 0 0 1
## 2 0 0 1
## 3 0 0 1
## 4 0 0 1
## 5 0 0 1
## 6 0 0 1
## yelp_tag_Izakaya yelp_tag_Japanese yelp_tag_Jewelry
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Juice.Bars...Smoothies yelp_tag_Knife.Sharpening yelp_tag_Korean
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Laser.Hair.Removal yelp_tag_Latin.American yelp_tag_Lebanese
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Limos yelp_tag_Live.Raw.Food yelp_tag_Local.Flavor yelp_tag_Lounges
## 1 1 0 0 1
## 2 1 0 0 1
## 3 1 0 0 1
## 4 1 0 0 1
## 5 1 0 0 1
## 6 1 0 0 1
## yelp_tag_Massage yelp_tag_Massage.Therapy yelp_tag_Meat.Shops
## 1 1 1 0
## 2 1 1 0
## 3 1 1 0
## 4 1 1 0
## 5 1 1 0
## 6 1 1 0
## yelp_tag_Mediterranean yelp_tag_Mexican yelp_tag_Middle.Eastern
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 1 1
## 5 0 1 1
## 6 0 1 1
## yelp_tag_Middle.Schools...High.Schools yelp_tag_Modern.European
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## yelp_tag_Music.Venues yelp_tag_New.Mexican.Cuisine yelp_tag_Noodles
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Nutritionists yelp_tag_Organic.Stores yelp_tag_Pan.Asian
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## yelp_tag_Party...Event.Planning yelp_tag_Pasta.Shops
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## yelp_tag_Patisserie.Cake.Shop yelp_tag_Personal.Chefs yelp_tag_Peruvian
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Pet.Boarding yelp_tag_Pet.Groomers yelp_tag_Pet.Training
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Pizza yelp_tag_Poke yelp_tag_Pop.Up.Restaurants
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## yelp_tag_Professional.Sports.Teams yelp_tag_Pubs yelp_tag_Ramen
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Restaurants yelp_tag_Salad yelp_tag_Sandwiches yelp_tag_Seafood
## 1 0 1 1 0
## 2 0 1 1 0
## 3 0 1 1 0
## 4 0 1 1 0
## 5 0 1 1 0
## 6 0 1 1 0
## yelp_tag_Skin.Care yelp_tag_Smokehouse yelp_tag_Soul.Food yelp_tag_Soup
## 1 0 0 1 1
## 2 0 0 1 1
## 3 0 0 1 1
## 4 0 0 1 1
## 5 0 0 1 1
## 6 0 0 1 1
## yelp_tag_Southern yelp_tag_Spanish yelp_tag_Speakeasies
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Specialty.Food yelp_tag_Sports.Bars yelp_tag_Stadiums...Arenas
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Steakhouses yelp_tag_Street.Vendors yelp_tag_Sushi.Bars
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Tacos yelp_tag_Taiwanese yelp_tag_Tapas.Bars
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## yelp_tag_Tapas.Small.Plates yelp_tag_Tex.Mex yelp_tag_Thai yelp_tag_Tiki.Bars
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## yelp_tag_Train.Stations yelp_tag_Turkish yelp_tag_Used yelp_tag_Vegan
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## yelp_tag_Vegetarian yelp_tag_Venues...Event.Spaces yelp_tag_Vietnamese
## 1 1 1 1
## 2 1 1 1
## 3 1 1 1
## 4 1 1 1
## 5 1 1 1
## 6 1 1 1
## yelp_tag_Vintage...Consignment yelp_tag_Waffles yelp_tag_Whiskey.Bars
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## yelp_tag_Wholesalers yelp_tag_Wine...Spirits yelp_tag_Wine.Bars
## 1 1 0 1
## 2 1 0 1
## 3 1 0 1
## 4 1 0 1
## 5 1 0 1
## 6 1 0 1
## yelp_tag_Wine.Tours yelp_tag_Wineries yelp_tag_Wraps
## 1 1 1 0
## 2 1 1 0
## 3 1 1 0
## 4 1 1 0
## 5 1 1 0
## 6 1 1 0