install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(glue)
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(readr)
cleanedtreesdf2 <- read_csv("cleanedtreesdf2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_double(),
##   Type = col_character(),
##   Date = col_character(),
##   Num = col_character(),
##   Ship_To_Address2 = col_character(),
##   Ship_Zip = col_character(),
##   Item = col_character(),
##   Qty = col_double(),
##   Sales_Price = col_character(),
##   Amount = col_double()
## )
"View(cleanedtreesdf2)"
## [1] "View(cleanedtreesdf2)"
cleanedtreesdf2
## # A tibble: 22,102 x 10
##       X1 Type  Date  Num   Ship_To_Address2 Ship_Zip Item    Qty Sales_Price
##    <dbl> <chr> <chr> <chr> <chr>            <chr>    <chr> <dbl> <chr>      
##  1     1 Invo… 7/29… 9845  MacTavish Court  46703    "Tre…     2 525        
##  2     2 Invo… 7/29… 9845  MacTavish Court  46703    "Tri…     1 160        
##  3     3 Invo… 5/11… SI-1… McDarmid Ave     46703    "Tre…     4 450        
##  4     4 Invo… 5/11… SI-1… McDarmid Ave     46703    "Rep…    NA -1800      
##  5     5 Invo… 7/11… SI-1… McDarmid Ave     46703    "Tre…     1 450        
##  6     6 Invo… 7/11… SI-1… McDarmid Ave     46703    "Rep…    NA -450       
##  7     7 Invo… 4/12… 7662R Menza Drive      46706    "Tre…     1 924        
##  8     8 Invo… 4/12… 7662R Menza Drive      46706    "Rep…    NA -924       
##  9     9 Invo… 5/19… 8378R Cascina Lane     46706    "Tre…     1 975        
## 10    10 Invo… 5/19… 8378R Cascina Lane     46706    "Rep…    NA -975       
## # … with 22,092 more rows, and 1 more variable: Amount <dbl>
fixed_date<-cleanedtreesdf2 %>%
  mutate(Date=mdy(Date))%>%
  mutate(Sales_Price=as.numeric(Sales_Price))
## Warning: Problem with `mutate()` input `Sales_Price`.
## i NAs introduced by coercion
## i Input `Sales_Price` is `as.numeric(Sales_Price)`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
fixed_date %>%
  select(Sales_Price)%>%
  count(Sales_Price)%>%
  arrange(desc(n))
## # A tibble: 2,788 x 2
##    Sales_Price     n
##          <dbl> <int>
##  1          35  2807
##  2          85   496
##  3         375   457
##  4         175   375
##  5         325   349
##  6           0   325
##  7         395   317
##  8         475   292
##  9         425   289
## 10         495   276
## # … with 2,778 more rows
count_dates <-fixed_date %>%
  count(Date)%>%
  arrange(desc(n))
count_dates
## # A tibble: 1,948 x 2
##    Date           n
##    <date>     <int>
##  1 2021-04-17    79
##  2 2020-05-16    73
##  3 2023-04-29    72
##  4 2019-05-11    71
##  5 2020-05-27    70
##  6 2020-09-19    69
##  7 2020-05-02    63
##  8 2020-06-13    61
##  9 2021-04-24    60
## 10 2019-10-09    57
## # … with 1,938 more rows
fixed_date%>%
  ggplot(aes(x= Date, y = Qty, group=1, colour=Amount)) +
  geom_line() +
  labs(title = "Number of Sales",
       subtitle= "Date & Quantity",
       x="Date Sold", y="Number of Items Sold")

ggplot(count_dates , aes(x=Date,y= n, group=1))+
  geom_line()

fixed_date %>%
  ggplot(aes(x= Date, y= Qty, group=1, colour=Amount)) +
  geom_smooth() +
  labs(title = "Number of Sales",
       subtitle= "Date & Quantity",
       x="Date Sold", y="Number of Items Sold")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1328 rows containing non-finite values (stat_smooth).