## [1] 17240
## [1] 17240 18
## # A tibble: 6 × 18
## id conta…¹ compa…² busin…³ busin…⁴ region route agent phone…⁵ phone…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1630930942… "Annt" NICE "Vendo… Other West … Arua… Jan … 077038… ""
## 2 1630942307… "Arthu… NICE "ARTHU… Retail… Mid W… Kyan… Kjam… 077375… ""
## 3 1631037299… "Muzam… NICE "Hope … Busine… West … Rout… Jan … 078297… "0123"
## 4 1631037356… "Majid" NICE "Majid… Busine… West … Rout… Jan … 078414… "0123"
## 5 1631037597… "Ayume" NICE "Ayume" Busine… West … Rout… Jan … 077278… "0123"
## 6 1631086297… "kabwo… NICE "insti… Busine… South… Rout… Mutu… 776327… "0123"
## # … with 8 more variables: district <chr>, parish <chr>, village <chr>,
## # location_description <chr>, gps <chr>, registered_date <date>,
## # registered_by <chr>, last_updated_by <chr>, and abbreviated variable names
## # ¹contact_name, ²company_name, ³business_name, ⁴business_type,
## # ⁵phone_number, ⁶phone_number_2
This dataset contains the sample metrics for all the active customers
Dimensions of the Dataset
## [1] 13220
## [1] 13140
## Rows: 13,140
## Columns: 66
## $ outlets_category <chr> "Duuka", "Duuka", "Duuka…
## $ region <chr> "East", "East", "East", …
## $ sub_areas <chr> "Jinja", "Jinja", "Jinja…
## $ outlets_visited <chr> "Mukyala Ssemakula", "Na…
## $ phone_number <chr> "0706862052", "075668577…
## $ outlet_type <chr> "Roadside", "Existing", …
## $ outlet_location <chr> "Buwenge Market street",…
## $ permanent_landmark <chr> "Central market", "Centr…
## $ qty_of_nice_shino_pens_purchased <dbl> NA, NA, 1, NA, NA, NA, 5…
## $ amount_paid_on_shino_pens <dbl> NA, NA, 17500, NA, NA, N…
## $ qty_of_nice_touch_pens_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ amount_paid_on_nice_touch_pens <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_nice_clear_pens_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ amount_paid_on_clear_pens <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_cleanzy_extra_toothbrushes_purchased <chr> NA, NA, NA, NA, NA, NA, …
## $ amount_paid_on_cleanzy_extra_toothbrushes <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_cleanzy_stripes_toothbrushes_purchased <lgl> NA, NA, NA, NA, NA, NA, …
## $ amount_paid_on_cleanzy_stripes_toothbrushes <lgl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_classic_toothbrushes_purchased <dbl> 2, NA, 2, NA, 3, 2, NA, …
## $ amount_paid_on_classic_toothbrushes <dbl> 9000, NA, 9000, NA, 1350…
## $ qty_of_buckets_purchased <dbl> 18, 24, 18, 12, 20, 24, …
## $ total_amount_paid_on_buckets <dbl> 91800, 122400, 91800, 61…
## $ qty_of_basins_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_basins <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_tumblers_purchased_sets <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_tumblers <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_chairs_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_chairs <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_apc_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_apc <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_cups_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_cups <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_plates_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_plates <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_jerrycans_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_jerrycans <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_sato_purchased <lgl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_sato <lgl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_cookware_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_cookware <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_clear_mugs_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_clear_mugs <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_watering_cans_purchased <dbl> 2, NA, NA, NA, NA, 1, NA…
## $ total_amount_paid_on_watering_cans <dbl> 13600, NA, NA, NA, NA, 1…
## $ qty_of_nice_plus_purchased <dbl> 1, NA, NA, NA, NA, NA, N…
## $ total_amount_paid_on_nice_plus <dbl> 6000, NA, NA, NA, NA, NA…
## $ qty_of_paint_bucket_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_paint_bucket <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_katasa_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ amount_paid_on_katasa <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_tables_purchased <lgl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_tables <lgl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_trays_64_33_purchased <dbl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_trays_64_33 <dbl> NA, NA, NA, NA, NA, NA, …
## $ qty_of_serving_trays_purchased <lgl> NA, NA, NA, NA, NA, NA, …
## $ total_amount_paid_on_serving_trays <lgl> NA, NA, NA, NA, NA, NA, …
## $ amount_paid_on_others <dbl> 120400, 122400, 100800, …
## $ supervisor <chr> "Kiringa Lilian", "Kirin…
## $ sales_rep <chr> "Namugabo Dina", "Namuga…
## $ date_reported <chr> "January 12th, 2023", "J…
## $ week <chr> "Jan.Week2", "Jan.Week2"…
## $ month <chr> "January", "January", "J…
## $ longitude <dbl> 33.24162, 33.24162, 33.2…
## $ latitude <dbl> 0.4661182, 0.4661182, 0.…
## $ year <dbl> 2023, 2023, 2023, 2023, …
## $ phone <chr> "706862052", "756685779"…
Sample of the Project Strickers Dataset
## # A tibble: 6 × 66
## outle…¹ region sub_a…² outle…³ phone…⁴ outle…⁵ outle…⁶ perma…⁷ qty_o…⁸ amoun…⁹
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Duuka East Jinja Mukyal… 070686… Roadsi… Buweng… Centra… NA NA
## 2 Duuka East Jinja Namate… 075668… Existi… Market… Centra… NA NA
## 3 Duuka East Jinja Hellen 075492… Roadsi… Market… Centra… 1 17500
## 4 Duuka East Jinja Babirye 075836… Roadsi… Market… Centra… NA NA
## 5 Duuka East Jinja Nankya 070239… Roadsi… Market… Centra… NA NA
## 6 Duuka East Jinja Muteka… 078420… Roadsi… Market… Centra… NA NA
## # … with 56 more variables: qty_of_nice_touch_pens_purchased <dbl>,
## # amount_paid_on_nice_touch_pens <dbl>,
## # qty_of_nice_clear_pens_purchased <dbl>, amount_paid_on_clear_pens <dbl>,
## # qty_of_cleanzy_extra_toothbrushes_purchased <chr>,
## # amount_paid_on_cleanzy_extra_toothbrushes <dbl>,
## # qty_of_cleanzy_stripes_toothbrushes_purchased <lgl>,
## # amount_paid_on_cleanzy_stripes_toothbrushes <lgl>, …
That is the data project strickers kicked off
customers_after_date <-
customers_df %>%
# customers registered after date
filter(registered_date > "2022-12-01") %>%
# filter for 256 and +256 phone numbers
filter(str_detect(phone_number, "^\\+256|^256")) %>%
# replace +256 with 256 in the phone number string
mutate(phone_number = str_replace(phone_number, "^\\+256", "256")) %>%
# replace the starting 256 with 0 in the phone number string
mutate(phone_number = str_replace(phone_number, "^256", "0")) %>%
# replace all the spaces in the phone number string
mutate(phone_number = str_replace_all(phone_number, " ", ""))
head(customers_after_date)
## # A tibble: 6 × 18
## id conta…¹ compa…² busin…³ busin…⁴ region route agent phone…⁵ phone…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1675329647… Adam NICE Adams … Wholes… West … Yumb… Joke… 077862… ""
## 2 1704718559… malga … NICE malga … Wholes… Centr… Gaya… Vesom 077251… ""
## 3 1693921473… Layisi NICE Layisi… Wholes… Centr… Banda Vesom 020097… ""
## 4 1704718473… NS who… NICE NS who… Wholes… Centr… Gaya… Vesom 070418… ""
## 5 1701145325… irene NICE irene Retail… Centr… Banda Vesom 070131… ""
## 6 1681712325… Elepha… NICE Elepha… Wholes… North Town… MeWa… 078231… ""
## # … with 8 more variables: district <chr>, parish <chr>, village <chr>,
## # location_description <chr>, gps <chr>, registered_date <date>,
## # registered_by <chr>, last_updated_by <chr>, and abbreviated variable names
## # ¹contact_name, ²company_name, ³business_name, ⁴business_type,
## # ⁵phone_number, ⁶phone_number_2
strikers_customers_df <-
customers_after_date %>%
left_join(customers_tbl2, by = c("phone_number" = "phone")) %>%
# left join with customer_metrics_df where id = customer_id
left_join(customer_metrics_df, by = c("id" = "customer_id")) %>%
# mutate avg_units_ordered to a number, and replance NA with 0
mutate(avg_units_ordered = as.numeric(avg_units_ordered)) %>%
# mutate life_time_visits to a number, and replance NA with 0
mutate(life_time_visits = as.numeric(life_time_visits)) %>%
# mutate is_active to a boolean
mutate(is_active = ifelse(is_active == "true", TRUE, FALSE)) %>%
# mutate avg_order_val to a number, and replance NA with 0
mutate(avg_order_val = as.numeric(avg_order_val)) %>%
# sort the data by avg_order_val desc
arrange(desc(avg_order_val))
nrow(strikers_customers_df)
## [1] 54
head(strikers_customers_df)
## # A tibble: 6 × 89
## id conta…¹ compa…² busin…³ busin…⁴ regio…⁵ route agent phone…⁶ phone…⁷
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 170039077… "Orono" NICE Orona … Wholes… East Toro… Jesa… 077353… ""
## 2 168781134… "Ritah" NICE Ritah … Instit… Centra… Kibu… Faid… 077138… ""
## 3 169329211… "namwa… NICE namwag… Wholes… East Igan… Jesa… 075543… ""
## 4 169719706… "Dunca… NICE Kabale… School Mid We… Paka… Geva… 070466… ""
## 5 169046958… "kasal… NICE kasali… Wholes… East Igan… Jesa… 070503… ""
## 6 168366309… "maha" NICE maha i… Wholes… East Toro… Anna… 075224… ""
## # … with 79 more variables: district <chr>, parish <chr>, village <chr>,
## # location_description <chr>, gps <chr>, registered_date <date>,
## # registered_by <chr>, last_updated_by <chr>, outlets_category <chr>,
## # region.y <chr>, sub_areas <chr>, outlets_visited <chr>,
## # phone_number.y <chr>, outlet_type <chr>, outlet_location <chr>,
## # permanent_landmark <chr>, qty_of_nice_shino_pens_purchased <dbl>,
## # amount_paid_on_shino_pens <dbl>, qty_of_nice_touch_pens_purchased <dbl>, …
strikers_customers_df %>%
mutate(month = format(registered_date, "%Y-%m")) %>%
count(month, business_type) %>%
ggplot(aes(x = month, y = n, fill = business_type)) +
geom_col() +
labs(title = "Number of customers registered by month", x = "Month", y = "Number of customers")
strikers_customers_df %>%
count(is_active) %>%
ggplot(aes(x = "", y = n, fill = is_active)) +
geom_bar(stat = "identity") +
coord_polar("y") +
labs(title = "Active vs Inactive customers", x = "", y = "") +
theme_minimal()
# Map of customers_df using gps column, extract lat and lon with
leaflet
# customers_df
strikers_customers_df %>%
# glimpse()
# extract lat and lon from gps column
separate(gps, c("lat", "lon"), sep = ",") %>%
# convert lat and lon to numeric
mutate(lat = as.numeric(lat), lon = as.numeric(lon)) %>%
# filter for customers with lat and lon
filter(!is.na(lat), !is.na(lon)) %>%
# plot these points using leaflet
leaflet() %>%
addTiles() %>%
# # color based on is_active, if true then green else red
addCircleMarkers(lng = ~lon, lat = ~lat, color = ~ifelse(is_active, "green", "red")) %>%
addMarkers(lng = ~lon, lat = ~lat)