library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
cleanedtreesdf2 <- read_csv("cleanedtreesdf2.csv")
## New names:Rows: 22102 Columns: 10── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Type, Date, Num, Ship_To_Address2, Ship_Zip, Item, Sales_Price
## dbl (3): ...1, Qty, Amount
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trees <- cleanedtreesdf2
#View(cleanedtreesdf2)
getwd()
## [1] "/cloud/project"
write.csv(trees, "df2.csv")
str(trees)
## spc_tbl_ [22,102 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ...1 : num [1:22102] 1 2 3 4 5 6 7 8 9 10 ...
## $ Type : chr [1:22102] "Invoice" "Invoice" "Invoice" "Invoice" ...
## $ Date : chr [1:22102] "7/29/2023" "7/29/2023" "5/11/2023" "5/11/2023" ...
## $ Num : chr [1:22102] "9845" "9845" "SI-1560R" "SI-1560R" ...
## $ Ship_To_Address2: chr [1:22102] "MacTavish Court" "MacTavish Court" "McDarmid Ave" "McDarmid Ave" ...
## $ Ship_Zip : chr [1:22102] "46703" "46703" "46703" "46703" ...
## $ Item : chr [1:22102] "Trees:Spruce:6' Fat Albert Blue Spruce" "Trip Charge" "Trees:Arborvitae:8' Emerald Green Arborvitae" "Replacement Discount" ...
## $ Qty : num [1:22102] 2 1 4 NA 1 NA 1 NA 1 NA ...
## $ Sales_Price : chr [1:22102] "525" "160" "450" "-1800" ...
## $ Amount : num [1:22102] 1050 160 1800 -1800 450 -450 924 -924 975 -975 ...
## - attr(*, "spec")=
## .. cols(
## .. ...1 = col_double(),
## .. Type = col_character(),
## .. Date = col_character(),
## .. Num = col_character(),
## .. Ship_To_Address2 = col_character(),
## .. Ship_Zip = col_character(),
## .. Item = col_character(),
## .. Qty = col_double(),
## .. Sales_Price = col_character(),
## .. Amount = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(lubridate)
library(glue)
trees
## # A tibble: 22,102 × 10
## ...1 Type Date Num Ship_To_Address2 Ship_Zip Item Qty Sales_Price
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 1 Invoice 7/29/2… 9845 MacTavish Court 46703 "Tre… 2 525
## 2 2 Invoice 7/29/2… 9845 MacTavish Court 46703 "Tri… 1 160
## 3 3 Invoice 5/11/2… SI-1… McDarmid Ave 46703 "Tre… 4 450
## 4 4 Invoice 5/11/2… SI-1… McDarmid Ave 46703 "Rep… NA -1800
## 5 5 Invoice 7/11/2… SI-1… McDarmid Ave 46703 "Tre… 1 450
## 6 6 Invoice 7/11/2… SI-1… McDarmid Ave 46703 "Rep… NA -450
## 7 7 Invoice 4/12/2… 7662R Menza Drive 46706 "Tre… 1 924
## 8 8 Invoice 4/12/2… 7662R Menza Drive 46706 "Rep… NA -924
## 9 9 Invoice 5/19/2… 8378R Cascina Lane 46706 "Tre… 1 975
## 10 10 Invoice 5/19/2… 8378R Cascina Lane 46706 "Rep… NA -975
## # ℹ 22,092 more rows
## # ℹ 1 more variable: Amount <dbl>
fixed_date <- trees %>%
mutate(Date = mdy(Date))%>%
mutate(Sales_Price = as.numeric(Sales_Price))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Sales_Price = as.numeric(Sales_Price)`.
## Caused by warning:
## ! NAs introduced by coercion
fixed_date
## # A tibble: 22,102 × 10
## ...1 Type Date Num Ship_To_Address2 Ship_Zip Item Qty
## <dbl> <chr> <date> <chr> <chr> <chr> <chr> <dbl>
## 1 1 Invoice 2023-07-29 9845 MacTavish Court 46703 "Trees:Sp… 2
## 2 2 Invoice 2023-07-29 9845 MacTavish Court 46703 "Trip Cha… 1
## 3 3 Invoice 2023-05-11 SI-1560R McDarmid Ave 46703 "Trees:Ar… 4
## 4 4 Invoice 2023-05-11 SI-1560R McDarmid Ave 46703 "Replacem… NA
## 5 5 Invoice 2023-07-11 SI-1560R2 McDarmid Ave 46703 "Trees:Ar… 1
## 6 6 Invoice 2023-07-11 SI-1560R2 McDarmid Ave 46703 "Replacem… NA
## 7 7 Invoice 2023-04-12 7662R Menza Drive 46706 "Trees:Pi… 1
## 8 8 Invoice 2023-04-12 7662R Menza Drive 46706 "Replacem… NA
## 9 9 Invoice 2023-05-19 8378R Cascina Lane 46706 "Trees:Tu… 1
## 10 10 Invoice 2023-05-19 8378R Cascina Lane 46706 "Replacem… NA
## # ℹ 22,092 more rows
## # ℹ 2 more variables: Sales_Price <dbl>, Amount <dbl>
fixed_date %>%
select(Sales_Price)%>%
count(Sales_Price)%>%
arrange(desc(n))
## # A tibble: 2,788 × 2
## Sales_Price n
## <dbl> <int>
## 1 35 2807
## 2 85 496
## 3 375 457
## 4 175 375
## 5 325 349
## 6 0 325
## 7 395 317
## 8 475 292
## 9 425 289
## 10 495 276
## # ℹ 2,778 more rows
glimpse(trees)
## Rows: 22,102
## Columns: 10
## $ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ Type <chr> "Invoice", "Invoice", "Invoice", "Invoice", "Invoice"…
## $ Date <chr> "7/29/2023", "7/29/2023", "5/11/2023", "5/11/2023", "…
## $ Num <chr> "9845", "9845", "SI-1560R", "SI-1560R", "SI-1560R2", …
## $ Ship_To_Address2 <chr> "MacTavish Court", "MacTavish Court", "McDarmid Ave",…
## $ Ship_Zip <chr> "46703", "46703", "46703", "46703", "46703", "46703",…
## $ Item <chr> "Trees:Spruce:6' Fat Albert Blue Spruce", "Trip Charg…
## $ Qty <dbl> 2, 1, 4, NA, 1, NA, 1, NA, 1, NA, -1, 3, 1, 3, NA, 1,…
## $ Sales_Price <chr> "525", "160", "450", "-1800", "450", "-450", "924", "…
## $ Amount <dbl> 1050.00, 160.00, 1800.00, -1800.00, 450.00, -450.00, …
trees%>%
count(Item == "Replacement Discount")
## # A tibble: 2 × 2
## `Item == "Replacement Discount"` n
## <lgl> <int>
## 1 FALSE 21050
## 2 TRUE 1052
They have replaced 1052 out of 21050 trees.
trees%>%
count(Type)
## # A tibble: 2 × 2
## Type n
## <chr> <int>
## 1 Credit Memo 403
## 2 Invoice 21699
They had 21699 invoices and 403 credit memos.
trees%>%
count(Item, sort = TRUE)
## # A tibble: 1,309 × 2
## Item n
## <chr> <int>
## 1 Installation 2952
## 2 Extended Warranty 1382
## 3 Tree Staking 1299
## 4 Replacement Discount 1052
## 5 Trip Charge 684
## 6 Fertilization 482
## 7 Trees:Arborvitae:6' Emerald Green Arborvitae 415
## 8 Delivery Fee 391
## 9 Trees:Spruce:6' Norway Spruce 384
## 10 Invoice Balancer 288
## # ℹ 1,299 more rows
trees%>%
count(Ship_To_Address2, sort = TRUE)
## # A tibble: 237 × 2
## Ship_To_Address2 n
## <chr> <int>
## 1 <NA> 21351
## 2 Dunton Rd. 16
## 3 Garman Road 12
## 4 E. 600 N 10
## 5 Pollock Rd 8
## 6 E. 400 N 7
## 7 E. North G 7
## 8 Fox Knoll Cove 7
## 9 Linden Grove Drive 7
## 10 Lost Valley Drive 7
## # ℹ 227 more rows
Garman Road has good sales.
ggplot(fixed_date, aes(x = Date , fill = Item)) +
geom_bar() +
guides(fill = "none") +
labs(title = "Dates",
subtitle = "Items",
x= NULL, y= "Sold Each Day")
ggplot(fixed_date, aes(x = Date, fill = Item)) +
geom_histogram() +
guides(fill = "none") +
labs(title = "Dates",
subtitle = "Items",
x= NULL, y= "Sold Each Day")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.