Trees R us

#install.packages("lubridate")
library(readr)

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'

## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'

## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'

## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'

## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'

## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'

## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'

## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'

library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ dplyr   1.0.2
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()

cleanedtreesdf2 <- read_csv("cleanedtreesdf2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   Type = col_character(),
##   Date = col_character(),
##   Num = col_character(),
##   Ship_To_Address2 = col_character(),
##   Ship_Zip = col_character(),
##   Item = col_character(),
##   Qty = col_double(),
##   Sales_Price = col_character(),
##   Amount = col_double()
## )

#View(cleanedtreesdf2)
cleanedtreesdf2

## # A tibble: 22,102 x 10
##       X1 Type  Date  Num   Ship_To_Address2 Ship_Zip Item    Qty Sales_Price
##    <dbl> <chr> <chr> <chr> <chr>            <chr>    <chr> <dbl> <chr>      
##  1     1 Invo… 7/29… 9845  MacTavish Court  46703    "Tre…     2 525        
##  2     2 Invo… 7/29… 9845  MacTavish Court  46703    "Tri…     1 160        
##  3     3 Invo… 5/11… SI-1… McDarmid Ave     46703    "Tre…     4 450        
##  4     4 Invo… 5/11… SI-1… McDarmid Ave     46703    "Rep…    NA -1800      
##  5     5 Invo… 7/11… SI-1… McDarmid Ave     46703    "Tre…     1 450        
##  6     6 Invo… 7/11… SI-1… McDarmid Ave     46703    "Rep…    NA -450       
##  7     7 Invo… 4/12… 7662R Menza Drive      46706    "Tre…     1 924        
##  8     8 Invo… 4/12… 7662R Menza Drive      46706    "Rep…    NA -924       
##  9     9 Invo… 5/19… 8378R Cascina Lane     46706    "Tre…     1 975        
## 10    10 Invo… 5/19… 8378R Cascina Lane     46706    "Rep…    NA -975       
## # … with 22,092 more rows, and 1 more variable: Amount <dbl>

fixed_date<-cleanedtreesdf2 %>%
  mutate(Date=mdy(Date))%>%
  mutate(Sales_Price=as.numeric(Sales_Price))

## Warning: Problem with `mutate()` input `Sales_Price`.
## i NAs introduced by coercion
## i Input `Sales_Price` is `as.numeric(Sales_Price)`.

## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion

fixed_date%>%
  select(Sales_Price)%>%
  count(Sales_Price)%>%
  arrange(desc(n))

## # A tibble: 2,788 x 2
##    Sales_Price     n
##          <dbl> <int>
##  1          35  2807
##  2          85   496
##  3         375   457
##  4         175   375
##  5         325   349
##  6           0   325
##  7         395   317
##  8         475   292
##  9         425   289
## 10         495   276
## # … with 2,778 more rows

Total data of your sales

count_dates <-fixed_date %>%
  count(Date)%>%
  arrange(desc(n))
count_dates

## # A tibble: 1,948 x 2
##    Date           n
##    <date>     <int>
##  1 2021-04-17    79
##  2 2020-05-16    73
##  3 2023-04-29    72
##  4 2019-05-11    71
##  5 2020-05-27    70
##  6 2020-09-19    69
##  7 2020-05-02    63
##  8 2020-06-13    61
##  9 2021-04-24    60
## 10 2019-10-09    57
## # … with 1,938 more rows

fixed_date %>%
  ggplot(aes(x= Date, y = Qty, group=1, colour=Amount)) +
  geom_line() +
  labs(title = "Number of sales each day", 
     subtitle= "Date and Quantity", 
     x="Date Sold", y= "Number of Items Sold")

ggplot(count_dates , aes(x=Date,y= n, group=1))+
  geom_line()

When you peaked for sales

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

cleanedtreesdf2 %>%
  group_by(Sales_Price) %>%
  summarize(
    Data = mean(Qty, na.rm = TRUE)
    )

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 2,788 x 2
##    Sales_Price  Data
##    <chr>       <dbl>
##  1 -0.01       0.968
##  2 -0.02       1    
##  3 -0.03       1    
##  4 -0.04       1    
##  5 -0.05       1    
##  6 -0.06       1    
##  7 -0.07       1    
##  8 -0.08       1    
##  9 -0.09       1    
## 10 -0.1        1    
## # … with 2,778 more rows

glimpse(cleanedtreesdf2)

## Rows: 22,102
## Columns: 10
## $ X1               <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ Type             <chr> "Invoice", "Invoice", "Invoice", "Invoice", "Invoice…
## $ Date             <chr> "7/29/2023", "7/29/2023", "5/11/2023", "5/11/2023", …
## $ Num              <chr> "9845", "9845", "SI-1560R", "SI-1560R", "SI-1560R2",…
## $ Ship_To_Address2 <chr> "MacTavish Court", "MacTavish Court", "McDarmid Ave"…
## $ Ship_Zip         <chr> "46703", "46703", "46703", "46703", "46703", "46703"…
## $ Item             <chr> "Trees:Spruce:6' Fat Albert Blue Spruce", "Trip Char…
## $ Qty              <dbl> 2, 1, 4, NA, 1, NA, 1, NA, 1, NA, -1, 3, 1, 3, NA, 1…
## $ Sales_Price      <chr> "525", "160", "450", "-1800", "450", "-450", "924", …
## $ Amount           <dbl> 1050.00, 160.00, 1800.00, -1800.00, 450.00, -450.00,…

fixed_date %>%
  ggplot(aes(x= Date, y = Qty, group=1, colour=Amount)) +
  geom_smooth() +
  labs(title = "Number of sales each day", 
     subtitle= "Date and Quantity", 
     x="Date Sold", y= "Number of Items Sold")

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## Warning: Removed 1328 rows containing non-finite values (stat_smooth).

Trees R us

2023-11-28