This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(readxl) # Load up libraries:pacman if (!require(“pacman”)) install.packages(“pacman”)
library(tidyverse) # library(tidyquant) library(readxl) # library(writexl)
readxl::read_excel() ?read_excel
path1 = “./bikes.xlsx”
bikes_tbl <- read_excel(path1) bikeshop_tbl <- read_excel(path = “./bikeshops.xlsx”) orderlines_tbl <- read_excel(path = “./orderlines.xlsx”) bike_orderlines_tbl <- read_excel(path = “./bike_orderlines.xlsx”)
bikes_tbl str(bikes_tbl) glimpse(bikes_tbl)
bike_orderlines_tbl %>% select(starts_with(“order_”))
bike_orderlines_tbl %>% select(bikeshop_name:state, everything())
bike_orderlines_tbl %>% pull(total_price) %>% mean() ## select_if() —- bike_orderlines_tbl %>% select_if(is.numeric)
bikes_tbl %>% select(model, price) %>% arrange(desc(price))
bikes_tbl %>% select(model, price) %>% filter(price > mean(price))
bikes_tbl %>% select(model, price) %>% filter((price > 5000) | (price < 1000)) %>% arrange(desc(price)) %>% View()
bikes_tbl %>% select(model, price) %>% filter((price > 6000), model %>% str_detect(“Supersix”)) %>% View()
bike_orderlines_tbl %>% filter(category_2 == “Over Mountain”) %>% View()
bikes_tbl %>% arrange(price) %>% slice(1:5) %>% View()
bike_orderlines_tbl %>% distinct(category_1, category_2) %>% View()
bike_orderlines_tbl %>% select(order_date, model, quantity, price) %>% mutate(total_price = quantity * price) %>% View()
bike_orderlines_tbl %>% mutate(total_price_binned = ntile(total_price, 3)) %>% View() ## case_when() —- ### Numeric to categorical bike_orderlines_tbl %>% mutate(total_price_binned = ntile(total_price, 3)) %>% mutate(total_price_binned2 = case_when( total_price > quantile(total_price, 0.75) ~ “High”, total_price > quantile(total_price, 0.25) ~ “Medium”, TRUE ~ “Low” )) %>% View()
?left_join bike_order <- left_join(orderlines_tbl, bikes_tbl, by = c(“product.id” = “bike.id”))
bike_order_1 <- bike_order %>% separate(description, into = c(“category 1”, “category 2”, “frame_material”), sep = ” - “) %>% left_join(bikeshop_tbl, by = c(”customer.id” = “bikeshop.id”)) %>% separate(location, into = c(“city”, “state”), sep = “,”) %>% mutate(total_price = quantity * price) %>% select(-1) %>% select(order.date, order.id, order.line, everything()) %>% select(1:3, quantity, price, total_price, everything()) %>% select(1:6, model:state)
bike_orderlines_tbl %>% summarise( revenue = sum(total_price) )
bike_orderlines_tbl %>% group_by(category_1, category_2, frame_material) %>% summarise( revenue = sum(total_price) ) %>% ungroup() %>% arrange(desc(revenue))
bike_orderlines_tbl %>% group_by(category_1, category_2) %>% summarise( count = n(), avg = mean(total_price), sd = sd(total_price) )
bike_orderlines_missing <- bike_orderlines_tbl %>% mutate(total_price = c(rep(NA, 4), total_price[5:nrow(.)] ))
bike_orderlines_missing %>% summarize_all(~ sum(is.na(.)) / length(.))
bike_order_1 %>% rename(order_date = order.date) %>%
rename(Bikeshop name
= bikeshop.name) %>% View() #
set_names(): change all column names at once bike_order_1 %>%
set_names(names(.) %>% str_replace(“_“,”.”)) %>% View()
bikeshop_revenue_tbl <- bike_orderlines_tbl %>% select(bikeshop_name, category_1, total_price) %>% group_by(bikeshop_name, category_1) %>% summarise(sales = sum(total_price)) %>% ungroup() %>% arrange(desc(sales))
bikeshop_revenue_formatted <- bikeshop_revenue_tbl %>% spread(key = category_1, value = sales)
bikeshop_revenue_formatted %>% gather(key = “category_1”, value = “sales”, Mountain, Road)
bike_orderlines_tbl %>% select(-contains(“order”)) %>% bind_cols(bike_orderlines_tbl %>% select(order_id)) %>% View()
df <- data.frame(points=c(10, NA, 15, 15, 14, 16), assists=c(4, NA, 4, NA, 9, 3), rebounds=c(NA, 5, 10, 7, 7, NA))
df
df %>% drop_na()
df %>% drop_na(rebounds)
df %>% replace_na(list(rebounds = 0)) %>% str() df %>% replace_na(list(rebounds = 0L)) %>% str()
library(lubridate)
order_date_tbl <- bike_orderlines_tbl %>% select(order_date)
order_date_tbl %>% pull(order_date) %>% class()
order_date_tbl %>% mutate(order_date_chr = as.character(order_date)) %>% mutate(order_date_chr2 = order_date_chr %>% str_c(” 00:00:00”)) %>% mutate(order_date_date = order_date_chr %>% ymd()) %>% mutate(order_date_dttm = order_date_chr2 %>% ymd_hms())
“06/01/18” %>% mdy()
“06/01/18 12:30:15” %>% mdy_hms()
“2011-01-01” %>% ymd() %>% year()
ddays(1)
bike_sales_y_tbl <- bike_orderlines_tbl %>% select(order_date, total_price) %>% mutate(year = year(order_date)) %>% group_by(year) %>% summarize(sales = sum(total_price)) %>% ungroup()
bike_sales_m_tbl <- bike_orderlines_tbl %>% select(order_date, total_price) %>% mutate(order_date = ymd(order_date)) %>% mutate(year = year(order_date), month = month(order_date, label = TRUE)) %>% group_by(year, month) %>% summarize(sales = sum(total_price))
bike_orderlines_tbl %>% select(order_date, total_price) %>% mutate(order_date = ymd(order_date)) %>% mutate(year_month = floor_date(order_date, unit = “month”)) %>% group_by(year_month) %>% summarize(sales = sum(total_price))
bike_orderlines_tbl %>% mutate(order_date = ymd(order_date)) %>% filter(year(order_date) %in% c(2012, 2013))