library(tidyverse)
library(tidyquant)
library(parsnip)
library(plotly)
library(DT)
library(gt)
library(fontawesome)
library(htmltools)
Pivoting
stock_data_tbl <- c("AAPL", "GOOG", "NFLX", "NVDA") %>%
tq_get(from = "2010-01-01", to = "2019-12-31") %>%
select(symbol, date, adjusted)
stock_data_tbl %>%
pivot_table(
.rows = c(~symbol, ~ MONTH(date, label = TRUE)),
.columns = ~ YEAR(date),
.values = ~ MEDIAN(adjusted)
) %>%
rename_at(.vars = 1:2,
~ c("Symbol", "Month")) %>%
DT::datatable()
stock_data_tbl %>%
pivot_table(
.rows = c(~ YEAR(date), ~ MONTH(date, label=TRUE)),
.columns = ~ symbol,
.values = ~ MEDIAN(adjusted)
) %>%
rename_at(
.vars = 1:2,
~ c("YEAR", "MONTH")
) %>%
DT::datatable()
stock_data_tbl %>%
pivot_table(
.rows = c(~ YEAR(date), ~symbol),
.columns = ~ MONTH(date) %>% as.factor(),
.values = ~ AVERAGE(adjusted)
) %>%
rename_at(
.vars = 1:2,
~ c("YEAR", "Symbol")
) %>%
DT::datatable()
stock_data_tbl %>%
pivot_table(
.rows = ~YEAR(date),
.columns = ~symbol,
.values = ~ PCT_CHANGE_FIRSTLAST(adjusted)
) %>%
rename(YEAR =1)
mpg %>%
group_by(manufacturer) %>%
count(class, name ="n") %>%
pivot_wider(names_from = class, #columns
values_from = n,
values_fill = 0) %>%
ungroup() -> my_pivot_wider
my_pivot_wider
my_pivot_wider %>%
pivot_longer(
cols = compact:subcompact,
names_to = "class",
values_to = "value"
)
my_pivot_wider %>%
pivot_longer(
cols = compact:subcompact,
names_to = "class",
values_to = "value"
) %>%
ggplot(aes(class, manufacturer, fill = value))+
geom_tile()+
geom_label(aes(label = value), color = "white")+
scale_fill_viridis_c()+
theme_minimal()

mpg %>%
pivot_table(
.rows = class,
.values = ~ list(lm(hwy ~ displ + cyl - 1))
)
Simple Linear with group_by()
- manufactuere group_by
- 그룹별 lm listing
- map_dfr 을 활용한 dataframe
- 각 식의 값에 대한 star_rating 생성
mpg %>%
mutate(manufacturer = as_factor(manufacturer)) %>%
group_by(manufacturer) %>%
group_split() %>%
map_dfr( .f = function(df) {
lm(hwy ~ cty + displ + cyl, data = df) %>%
glance() %>%
add_column(Brand = unique(df$manufacturer), .before = 1)
}) -> linear_sample
linear_sample
## star rating
star_rating <-function(rating, max_rate = 5) {
rating <- floor(rating + 0.5)
stars <- lapply(seq_len(max_rate), function(i) {
if(i <= rating) {
fontawesome::fa("star", fill="orange")
}else {
fontawesome::fa("star", fill="grey")
}
})
label <- sprintf("%s out of %s", rating, max_rate)
div_out <- div(title = label , "aria-label"=label, role = "img", stars)
as.character(div_out) %>%
gt::html()
}
linear_sample %>%
select(Brand, nobs, r.squared, adj.r.squared, p.value) %>%
mutate(Brand = str_to_title(Brand)) %>%
mutate(Rating = cut_number(r.squared, n=5) %>% as.numeric()) %>%
mutate(Rating = map(Rating, star_rating)) %>%
arrange(desc(adj.r.squared))
read.csv("free_r_tips-master/011_group_split_linear_regression/kbo.csv", header = T, stringsAsFactors = TRUE) -> base_ball
base_ball %>%
select(-연도) %>%
group_by(구단) %>%
group_split() %>%
map_dfr(.f = function(df) {
lm(타석당득점 ~ 출루율 + 장타력 + OPS + wOBA, data= df) %>%
glance() %>%
add_column(team_name = unique(df$구단), .before = 1)
}) %>%
select(team_name, nobs, r.squared, adj.r.squared, p.value) -> linear_baseball
linear_baseball
linear_baseball %>%
filter(nobs >= 5) %>%
mutate(team_name = as.factor(team_name) %>%
str_to_title()) %>%
mutate(Rates = cut_number(r.squared, n=5) %>% as.numeric()) %>%
mutate(Rates = map(Rates, star_rating)) %>%
set_names(names(.) %>% str_to_title()) %>%
arrange(desc(R.squared)) %>%
DT::datatable()
# bikeshop_revenue_tbl %>%
# set_names(names(.) %>% str_replace("_", " ") %>% str_to_title())
ModelTime + ARIMA
library(modeltime)
library(tidymodels)
library(timetk)
library(tidyverse)
walmart_sales_weekly
walmart_sales_weekly %>%
group_by(id) %>%
plot_time_series(Date, Weekly_Sales,
.facet_ncol = 2)
#Nesting = 101-R week 5 Again
#nesting = Create a "nested" data frame structure. The sturucue is a reshaped data frame with subsets of data contained indsie list
walmart_sales_weekly %>%
select(id, Date, Weekly_Sales) %>%
nest(nested_column = - id) -> data_nested
#id 기준 listing 형으로 묶어줌
#data_nested
data_nested$nested_column
## [[1]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 24924.
## 2 2010-02-12 46039.
## 3 2010-02-19 41596.
## 4 2010-02-26 19404.
## 5 2010-03-05 21828.
## 6 2010-03-12 21043.
## 7 2010-03-19 22137.
## 8 2010-03-26 26229.
## 9 2010-04-02 57258.
## 10 2010-04-09 42961.
## # ... with 133 more rows
##
## [[2]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 13740.
## 2 2010-02-12 10888.
## 3 2010-02-19 11523.
## 4 2010-02-26 11135.
## 5 2010-03-05 12276.
## 6 2010-03-12 10123.
## 7 2010-03-19 9001.
## 8 2010-03-26 10367.
## 9 2010-04-02 11157.
## 10 2010-04-09 10179.
## # ... with 133 more rows
##
## [[3]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 40129.
## 2 2010-02-12 37335.
## 3 2010-02-19 38718.
## 4 2010-02-26 35318.
## 5 2010-03-05 38776.
## 6 2010-03-12 34343.
## 7 2010-03-19 34189.
## 8 2010-03-26 33582.
## 9 2010-04-02 38152.
## 10 2010-04-09 34146.
## # ... with 133 more rows
##
## [[4]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 41969.
## 2 2010-02-12 36476.
## 3 2010-02-19 37858.
## 4 2010-02-26 37467.
## 5 2010-03-05 40424.
## 6 2010-03-12 35833.
## 7 2010-03-19 36807.
## 8 2010-03-26 35432.
## 9 2010-04-02 38105.
## 10 2010-04-09 37638.
## # ... with 133 more rows
##
## [[5]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 115564.
## 2 2010-02-12 94136.
## 3 2010-02-19 98673.
## 4 2010-02-26 92756.
## 5 2010-03-05 108283.
## 6 2010-03-12 98175.
## 7 2010-03-19 71166.
## 8 2010-03-26 95855.
## 9 2010-04-02 101567.
## 10 2010-04-09 98689.
## # ... with 133 more rows
##
## [[6]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 64495.
## 2 2010-02-12 70202.
## 3 2010-02-19 62582.
## 4 2010-02-26 57630.
## 5 2010-03-05 63550.
## 6 2010-03-12 63593.
## 7 2010-03-19 62781.
## 8 2010-03-26 55082.
## 9 2010-04-02 63245
## 10 2010-04-09 66796.
## # ... with 133 more rows
##
## [[7]]
## # A tibble: 143 x 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 106690.
## 2 2010-02-12 111390.
## 3 2010-02-19 107952.
## 4 2010-02-26 103653.
## 5 2010-03-05 112808.
## 6 2010-03-12 112048.
## 7 2010-03-19 117716.
## 8 2010-03-26 113117.
## 9 2010-04-02 111466.
## 10 2010-04-09 116771.
## # ... with 133 more rows
data_nested %>%
unnest(nested_column)
data_nested %>%
mutate(fitted_model = map(nested_column, .f = function(df) {
arima_reg(seasonal_period = 52) %>%
set_engine("auto_arima") %>%
fit(Weekly_Sales ~ Date, data=df)
})) %>%
mutate(nested_forecast = map2(fitted_model,
nested_column,
.f = function(arima_model, df) {
modeltime_table(
arima_model
) %>%
modeltime_forecast(
h =52,
actual_data = df
)
})) -> model_table
model_table %>%
select(id, nested_forecast) %>%
unnest(nested_forecast) %>%
group_by(id) %>%
plot_modeltime_forecast(.facet_ncol =2)
MAP- ggplot
#install.packages(c("maps", "mapproj"))
library(maps)
library(mapproj)
map_data("world") %>%
tibble() -> world_data
# 세계 지도 완성
world_data %>%
ggplot() +
geom_map(
aes(long, lat, map_id = region),
map = world_data,
color = "grey80", fill = "gray30", size= 0.3
)

world_data %>%
ggplot() +
geom_map(
aes(long, lat, map_id = region),
map = world_data,
color = "grey80", fill = "gray30", size= 0.3
)+
coord_map("ortho",
orientation = c(39,-98, 0))

map_data("world", region = c("China","Japan","North Korea","South Korea")) %>%
tibble() -> ASIA
ASIA %>%
ggplot(aes(long, lat, map_id = region)) +
geom_map(
map = ASIA,
color = "grey80", fill= "grey30", size = 0.3
) +
coord_map("ortho",
orientation= c(20, 130, 0))

usa_tbl <- map_data("state") %>% as_tibble()
maps::votes.repub %>%
as.tibble(rownames = "state") %>%
select(state, '1976') %>%
rename(repub_prop = '1976') %>%
mutate(repub_prop = repub_prop/100) %>%
mutate(state = str_to_lower(state)) -> VOTE_1976
usa_tbl %>%
left_join(VOTE_1976, by = c("region" = "state")) -> USE_VOTE_1976
USE_VOTE_1976 %>%
ggplot(aes(long, lat, group = subregion)) +
geom_map(
aes(map_id = region),
map = usa_tbl,
color = "gray80", fill = "gray30", size = 0.3
) +
coord_map("ortho", orientation = c(39, -98, 0)) +
geom_polygon(aes(group = group,
fill = repub_prop), color = "black")+
scale_fill_gradient2(low = "blue", mid = "white", high = "red",
midpoint = 0.5, labels = scales::percent) +
theme_minimal()
