R Markdown

library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
cleanedtreesdf2 <- read_csv("cleanedtreesdf2.csv")
## New names:Rows: 22102 Columns: 10── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Type, Date, Num, Ship_To_Address2, Ship_Zip, Item, Sales_Price
## dbl (3): ...1, Qty, Amount
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trees <- cleanedtreesdf2
#View(cleanedtreesdf2)
getwd()
## [1] "/cloud/project"
write.csv(trees, "df2.csv")
str(trees)
## spc_tbl_ [22,102 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ...1            : num [1:22102] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Type            : chr [1:22102] "Invoice" "Invoice" "Invoice" "Invoice" ...
##  $ Date            : chr [1:22102] "7/29/2023" "7/29/2023" "5/11/2023" "5/11/2023" ...
##  $ Num             : chr [1:22102] "9845" "9845" "SI-1560R" "SI-1560R" ...
##  $ Ship_To_Address2: chr [1:22102] "MacTavish Court" "MacTavish Court" "McDarmid Ave" "McDarmid Ave" ...
##  $ Ship_Zip        : chr [1:22102] "46703" "46703" "46703" "46703" ...
##  $ Item            : chr [1:22102] "Trees:Spruce:6' Fat Albert Blue Spruce" "Trip Charge" "Trees:Arborvitae:8' Emerald Green Arborvitae" "Replacement Discount" ...
##  $ Qty             : num [1:22102] 2 1 4 NA 1 NA 1 NA 1 NA ...
##  $ Sales_Price     : chr [1:22102] "525" "160" "450" "-1800" ...
##  $ Amount          : num [1:22102] 1050 160 1800 -1800 450 -450 924 -924 975 -975 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ...1 = col_double(),
##   ..   Type = col_character(),
##   ..   Date = col_character(),
##   ..   Num = col_character(),
##   ..   Ship_To_Address2 = col_character(),
##   ..   Ship_Zip = col_character(),
##   ..   Item = col_character(),
##   ..   Qty = col_double(),
##   ..   Sales_Price = col_character(),
##   ..   Amount = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(lubridate)
library(glue)
trees
## # A tibble: 22,102 × 10
##     ...1 Type    Date    Num   Ship_To_Address2 Ship_Zip Item    Qty Sales_Price
##    <dbl> <chr>   <chr>   <chr> <chr>            <chr>    <chr> <dbl> <chr>      
##  1     1 Invoice 7/29/2… 9845  MacTavish Court  46703    "Tre…     2 525        
##  2     2 Invoice 7/29/2… 9845  MacTavish Court  46703    "Tri…     1 160        
##  3     3 Invoice 5/11/2… SI-1… McDarmid Ave     46703    "Tre…     4 450        
##  4     4 Invoice 5/11/2… SI-1… McDarmid Ave     46703    "Rep…    NA -1800      
##  5     5 Invoice 7/11/2… SI-1… McDarmid Ave     46703    "Tre…     1 450        
##  6     6 Invoice 7/11/2… SI-1… McDarmid Ave     46703    "Rep…    NA -450       
##  7     7 Invoice 4/12/2… 7662R Menza Drive      46706    "Tre…     1 924        
##  8     8 Invoice 4/12/2… 7662R Menza Drive      46706    "Rep…    NA -924       
##  9     9 Invoice 5/19/2… 8378R Cascina Lane     46706    "Tre…     1 975        
## 10    10 Invoice 5/19/2… 8378R Cascina Lane     46706    "Rep…    NA -975       
## # ℹ 22,092 more rows
## # ℹ 1 more variable: Amount <dbl>
fixed_date <- trees %>%
  mutate(Date = mdy(Date))%>%
  mutate(Sales_Price = as.numeric(Sales_Price))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Sales_Price = as.numeric(Sales_Price)`.
## Caused by warning:
## ! NAs introduced by coercion
fixed_date
## # A tibble: 22,102 × 10
##     ...1 Type    Date       Num       Ship_To_Address2 Ship_Zip Item         Qty
##    <dbl> <chr>   <date>     <chr>     <chr>            <chr>    <chr>      <dbl>
##  1     1 Invoice 2023-07-29 9845      MacTavish Court  46703    "Trees:Sp…     2
##  2     2 Invoice 2023-07-29 9845      MacTavish Court  46703    "Trip Cha…     1
##  3     3 Invoice 2023-05-11 SI-1560R  McDarmid Ave     46703    "Trees:Ar…     4
##  4     4 Invoice 2023-05-11 SI-1560R  McDarmid Ave     46703    "Replacem…    NA
##  5     5 Invoice 2023-07-11 SI-1560R2 McDarmid Ave     46703    "Trees:Ar…     1
##  6     6 Invoice 2023-07-11 SI-1560R2 McDarmid Ave     46703    "Replacem…    NA
##  7     7 Invoice 2023-04-12 7662R     Menza Drive      46706    "Trees:Pi…     1
##  8     8 Invoice 2023-04-12 7662R     Menza Drive      46706    "Replacem…    NA
##  9     9 Invoice 2023-05-19 8378R     Cascina Lane     46706    "Trees:Tu…     1
## 10    10 Invoice 2023-05-19 8378R     Cascina Lane     46706    "Replacem…    NA
## # ℹ 22,092 more rows
## # ℹ 2 more variables: Sales_Price <dbl>, Amount <dbl>
fixed_date %>%
  select(Sales_Price)%>%
  count(Sales_Price)%>%
  arrange(desc(n))
## # A tibble: 2,788 × 2
##    Sales_Price     n
##          <dbl> <int>
##  1          35  2807
##  2          85   496
##  3         375   457
##  4         175   375
##  5         325   349
##  6           0   325
##  7         395   317
##  8         475   292
##  9         425   289
## 10         495   276
## # ℹ 2,778 more rows
glimpse(trees)
## Rows: 22,102
## Columns: 10
## $ ...1             <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ Type             <chr> "Invoice", "Invoice", "Invoice", "Invoice", "Invoice"…
## $ Date             <chr> "7/29/2023", "7/29/2023", "5/11/2023", "5/11/2023", "…
## $ Num              <chr> "9845", "9845", "SI-1560R", "SI-1560R", "SI-1560R2", …
## $ Ship_To_Address2 <chr> "MacTavish Court", "MacTavish Court", "McDarmid Ave",…
## $ Ship_Zip         <chr> "46703", "46703", "46703", "46703", "46703", "46703",…
## $ Item             <chr> "Trees:Spruce:6' Fat Albert Blue Spruce", "Trip Charg…
## $ Qty              <dbl> 2, 1, 4, NA, 1, NA, 1, NA, 1, NA, -1, 3, 1, 3, NA, 1,…
## $ Sales_Price      <chr> "525", "160", "450", "-1800", "450", "-450", "924", "…
## $ Amount           <dbl> 1050.00, 160.00, 1800.00, -1800.00, 450.00, -450.00, …
trees%>%
  count(Item == "Replacement Discount")
## # A tibble: 2 × 2
##   `Item == "Replacement Discount"`     n
##   <lgl>                            <int>
## 1 FALSE                            21050
## 2 TRUE                              1052

They have replaced 1052 out of 21050 trees.

trees%>%
  count(Type)
## # A tibble: 2 × 2
##   Type            n
##   <chr>       <int>
## 1 Credit Memo   403
## 2 Invoice     21699

They had 21699 invoices and 403 credit memos.

trees%>%
  count(Item, sort = TRUE)
## # A tibble: 1,309 × 2
##    Item                                             n
##    <chr>                                        <int>
##  1 Installation                                  2952
##  2 Extended Warranty                             1382
##  3 Tree Staking                                  1299
##  4 Replacement Discount                          1052
##  5 Trip Charge                                    684
##  6 Fertilization                                  482
##  7 Trees:Arborvitae:6' Emerald Green Arborvitae   415
##  8 Delivery Fee                                   391
##  9 Trees:Spruce:6' Norway Spruce                  384
## 10 Invoice Balancer                               288
## # ℹ 1,299 more rows
trees%>%
  count(Ship_To_Address2, sort = TRUE)
## # A tibble: 237 × 2
##    Ship_To_Address2       n
##    <chr>              <int>
##  1 <NA>               21351
##  2 Dunton Rd.            16
##  3 Garman Road           12
##  4 E. 600 N              10
##  5 Pollock Rd             8
##  6 E. 400 N               7
##  7 E. North G             7
##  8 Fox Knoll Cove         7
##  9 Linden Grove Drive     7
## 10 Lost Valley Drive      7
## # ℹ 227 more rows

Garman Road has good sales.

ggplot(fixed_date, aes(x = Date , fill = Item)) +
  geom_bar() +
  guides(fill = "none") +
  labs(title = "Dates",
       subtitle = "Items",
       x= NULL, y= "Sold Each Day")

ggplot(fixed_date, aes(x = Date, fill = Item)) +
  geom_histogram() +
  guides(fill = "none") +
  labs(title = "Dates",
       subtitle = "Items",
       x= NULL, y= "Sold Each Day")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.