#install.packages("lubridate")
library(readr)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ dplyr 1.0.2
## ✓ tibble 3.0.4 ✓ stringr 1.4.0
## ✓ tidyr 1.1.2 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
cleanedtreesdf2 <- read_csv("cleanedtreesdf2.csv")
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## Type = col_character(),
## Date = col_character(),
## Num = col_character(),
## Ship_To_Address2 = col_character(),
## Ship_Zip = col_character(),
## Item = col_character(),
## Qty = col_double(),
## Sales_Price = col_character(),
## Amount = col_double()
## )
#View(cleanedtreesdf2)
cleanedtreesdf2
## # A tibble: 22,102 x 10
## X1 Type Date Num Ship_To_Address2 Ship_Zip Item Qty Sales_Price
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 1 Invo… 7/29… 9845 MacTavish Court 46703 "Tre… 2 525
## 2 2 Invo… 7/29… 9845 MacTavish Court 46703 "Tri… 1 160
## 3 3 Invo… 5/11… SI-1… McDarmid Ave 46703 "Tre… 4 450
## 4 4 Invo… 5/11… SI-1… McDarmid Ave 46703 "Rep… NA -1800
## 5 5 Invo… 7/11… SI-1… McDarmid Ave 46703 "Tre… 1 450
## 6 6 Invo… 7/11… SI-1… McDarmid Ave 46703 "Rep… NA -450
## 7 7 Invo… 4/12… 7662R Menza Drive 46706 "Tre… 1 924
## 8 8 Invo… 4/12… 7662R Menza Drive 46706 "Rep… NA -924
## 9 9 Invo… 5/19… 8378R Cascina Lane 46706 "Tre… 1 975
## 10 10 Invo… 5/19… 8378R Cascina Lane 46706 "Rep… NA -975
## # … with 22,092 more rows, and 1 more variable: Amount <dbl>
fixed_date<-cleanedtreesdf2 %>%
mutate(Date=mdy(Date))%>%
mutate(Sales_Price=as.numeric(Sales_Price))
## Warning: Problem with `mutate()` input `Sales_Price`.
## i NAs introduced by coercion
## i Input `Sales_Price` is `as.numeric(Sales_Price)`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
fixed_date%>%
select(Sales_Price)%>%
count(Sales_Price)%>%
arrange(desc(n))
## # A tibble: 2,788 x 2
## Sales_Price n
## <dbl> <int>
## 1 35 2807
## 2 85 496
## 3 375 457
## 4 175 375
## 5 325 349
## 6 0 325
## 7 395 317
## 8 475 292
## 9 425 289
## 10 495 276
## # … with 2,778 more rows
Total data of your sales
count_dates <-fixed_date %>%
count(Date)%>%
arrange(desc(n))
count_dates
## # A tibble: 1,948 x 2
## Date n
## <date> <int>
## 1 2021-04-17 79
## 2 2020-05-16 73
## 3 2023-04-29 72
## 4 2019-05-11 71
## 5 2020-05-27 70
## 6 2020-09-19 69
## 7 2020-05-02 63
## 8 2020-06-13 61
## 9 2021-04-24 60
## 10 2019-10-09 57
## # … with 1,938 more rows
fixed_date %>%
ggplot(aes(x= Date, y = Qty, group=1, colour=Amount)) +
geom_line() +
labs(title = "Number of sales each day",
subtitle= "Date and Quantity",
x="Date Sold", y= "Number of Items Sold")
ggplot(count_dates , aes(x=Date,y= n, group=1))+
geom_line()
When you peaked for sales
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
cleanedtreesdf2 %>%
group_by(Sales_Price) %>%
summarize(
Data = mean(Qty, na.rm = TRUE)
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2,788 x 2
## Sales_Price Data
## <chr> <dbl>
## 1 -0.01 0.968
## 2 -0.02 1
## 3 -0.03 1
## 4 -0.04 1
## 5 -0.05 1
## 6 -0.06 1
## 7 -0.07 1
## 8 -0.08 1
## 9 -0.09 1
## 10 -0.1 1
## # … with 2,778 more rows
glimpse(cleanedtreesdf2)
## Rows: 22,102
## Columns: 10
## $ X1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ Type <chr> "Invoice", "Invoice", "Invoice", "Invoice", "Invoice…
## $ Date <chr> "7/29/2023", "7/29/2023", "5/11/2023", "5/11/2023", …
## $ Num <chr> "9845", "9845", "SI-1560R", "SI-1560R", "SI-1560R2",…
## $ Ship_To_Address2 <chr> "MacTavish Court", "MacTavish Court", "McDarmid Ave"…
## $ Ship_Zip <chr> "46703", "46703", "46703", "46703", "46703", "46703"…
## $ Item <chr> "Trees:Spruce:6' Fat Albert Blue Spruce", "Trip Char…
## $ Qty <dbl> 2, 1, 4, NA, 1, NA, 1, NA, 1, NA, -1, 3, 1, 3, NA, 1…
## $ Sales_Price <chr> "525", "160", "450", "-1800", "450", "-450", "924", …
## $ Amount <dbl> 1050.00, 160.00, 1800.00, -1800.00, 450.00, -450.00,…
fixed_date %>%
ggplot(aes(x= Date, y = Qty, group=1, colour=Amount)) +
geom_smooth() +
labs(title = "Number of sales each day",
subtitle= "Date and Quantity",
x="Date Sold", y= "Number of Items Sold")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1328 rows containing non-finite values (stat_smooth).