install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(readr)
cleanedtreesdf2 <- read_csv("cleanedtreesdf2.csv")
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## Type = col_character(),
## Date = col_character(),
## Num = col_character(),
## Ship_To_Address2 = col_character(),
## Ship_Zip = col_character(),
## Item = col_character(),
## Qty = col_double(),
## Sales_Price = col_character(),
## Amount = col_double()
## )
"View(cleanedtreesdf2)"
## [1] "View(cleanedtreesdf2)"
cleanedtreesdf2
## # A tibble: 22,102 x 10
## X1 Type Date Num Ship_To_Address2 Ship_Zip Item Qty Sales_Price
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 1 Invo… 7/29… 9845 MacTavish Court 46703 "Tre… 2 525
## 2 2 Invo… 7/29… 9845 MacTavish Court 46703 "Tri… 1 160
## 3 3 Invo… 5/11… SI-1… McDarmid Ave 46703 "Tre… 4 450
## 4 4 Invo… 5/11… SI-1… McDarmid Ave 46703 "Rep… NA -1800
## 5 5 Invo… 7/11… SI-1… McDarmid Ave 46703 "Tre… 1 450
## 6 6 Invo… 7/11… SI-1… McDarmid Ave 46703 "Rep… NA -450
## 7 7 Invo… 4/12… 7662R Menza Drive 46706 "Tre… 1 924
## 8 8 Invo… 4/12… 7662R Menza Drive 46706 "Rep… NA -924
## 9 9 Invo… 5/19… 8378R Cascina Lane 46706 "Tre… 1 975
## 10 10 Invo… 5/19… 8378R Cascina Lane 46706 "Rep… NA -975
## # … with 22,092 more rows, and 1 more variable: Amount <dbl>
fixed_date<-cleanedtreesdf2 %>%
mutate(Date=mdy(Date))%>%
mutate(Sales_Price=as.numeric(Sales_Price))
## Warning: Problem with `mutate()` input `Sales_Price`.
## i NAs introduced by coercion
## i Input `Sales_Price` is `as.numeric(Sales_Price)`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
fixed_date %>%
select(Sales_Price)%>%
count(Sales_Price)%>%
arrange(desc(n))
## # A tibble: 2,788 x 2
## Sales_Price n
## <dbl> <int>
## 1 35 2807
## 2 85 496
## 3 375 457
## 4 175 375
## 5 325 349
## 6 0 325
## 7 395 317
## 8 475 292
## 9 425 289
## 10 495 276
## # … with 2,778 more rows
count_dates <-fixed_date %>%
count(Date)%>%
arrange(desc(n))
count_dates
## # A tibble: 1,948 x 2
## Date n
## <date> <int>
## 1 2021-04-17 79
## 2 2020-05-16 73
## 3 2023-04-29 72
## 4 2019-05-11 71
## 5 2020-05-27 70
## 6 2020-09-19 69
## 7 2020-05-02 63
## 8 2020-06-13 61
## 9 2021-04-24 60
## 10 2019-10-09 57
## # … with 1,938 more rows
fixed_date%>%
ggplot(aes(x= Date, y = Qty, group=1, colour=Amount)) +
geom_line() +
labs(title = "Number of Sales",
subtitle= "Date & Quantity",
x="Date Sold", y="Number of Items Sold")

ggplot(count_dates , aes(x=Date,y= n, group=1))+
geom_line()

fixed_date %>%
ggplot(aes(x= Date, y= Qty, group=1, colour=Amount)) +
geom_smooth() +
labs(title = "Number of Sales",
subtitle= "Date & Quantity",
x="Date Sold", y="Number of Items Sold")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1328 rows containing non-finite values (stat_smooth).
