library(tidyverse)
library(tidyquant)
library(parsnip)
library(plotly)
library(DT)
library(gt)
library(fontawesome)
library(htmltools)

Pivoting

stock_data_tbl <- c("AAPL", "GOOG", "NFLX", "NVDA") %>%
    tq_get(from = "2010-01-01", to = "2019-12-31") %>%
    select(symbol, date, adjusted)


stock_data_tbl %>%  
  
  pivot_table(
    .rows    = c(~symbol, ~ MONTH(date, label = TRUE)), 
    .columns =  ~ YEAR(date), 
    .values =  ~ MEDIAN(adjusted)
  ) %>%  
  
  rename_at(.vars = 1:2, 
            ~ c("Symbol", "Month")) %>%  
  
  DT::datatable()

stock_data_tbl %>%  
  
  pivot_table(
    
    .rows    = c(~ YEAR(date), ~ MONTH(date, label=TRUE)), 
    .columns = ~ symbol, 
    .values =  ~ MEDIAN(adjusted)
    
  ) %>%  
  rename_at(
    .vars = 1:2, 
    ~ c("YEAR", "MONTH")
  ) %>%  
  
  DT::datatable()

stock_data_tbl %>%  
  
  pivot_table(
    
    .rows = c(~ YEAR(date), ~symbol), 
    .columns = ~ MONTH(date) %>%  as.factor(), 
    .values = ~ AVERAGE(adjusted)
    
  ) %>%  
  
  rename_at(
    .vars = 1:2, 
    ~ c("YEAR", "Symbol")
  ) %>%  
  

  
  DT::datatable()

stock_data_tbl %>%  
  
  pivot_table(
    
    .rows =  ~YEAR(date), 
    .columns = ~symbol, 
    .values = ~ PCT_CHANGE_FIRSTLAST(adjusted)
    
    
  ) %>%  
  
  rename(YEAR =1)

mpg %>%  
  
  group_by(manufacturer) %>%  
  count(class, name ="n") %>% 
  
  
  pivot_wider(names_from = class,   #columns
              values_from = n, 
              values_fill = 0) %>% 
  ungroup() -> my_pivot_wider

my_pivot_wider

my_pivot_wider %>%  
  
  pivot_longer(
    
    cols = compact:subcompact, 
    names_to = "class", 
    values_to = "value"
  )

my_pivot_wider %>%  
  
  pivot_longer(
    
    cols = compact:subcompact, 
    names_to = "class", 
    values_to = "value"
  ) %>%  
  
  ggplot(aes(class, manufacturer, fill = value))+
  
  geom_tile()+
  geom_label(aes(label = value), color = "white")+ 
  
  scale_fill_viridis_c()+
        
  theme_minimal()

mpg %>%  
  
  pivot_table(
    
    .rows = class, 
    .values = ~ list(lm(hwy ~ displ + cyl - 1))
    
    
  )

Simple Linear with group_by()

manufactuere group_by
그룹별 lm listing
map_dfr 을 활용한 dataframe
각 식의 값에 대한 star_rating 생성

mpg %>%  
  
  mutate(manufacturer = as_factor(manufacturer)) %>%  
  
  group_by(manufacturer) %>%  
  group_split() %>%  
  
  
  
  map_dfr( .f = function(df) {
    
    lm(hwy ~ cty + displ + cyl, data = df) %>% 
      glance() %>%  
      
      add_column(Brand  = unique(df$manufacturer), .before = 1)
    
  }) -> linear_sample

linear_sample

## star rating 


star_rating <-function(rating, max_rate = 5) {
  
  
  rating <- floor(rating + 0.5)
  
  stars <- lapply(seq_len(max_rate), function(i) {
    
    if(i <= rating) {
      fontawesome::fa("star",  fill="orange")
    }else {
      
      fontawesome::fa("star", fill="grey")
    }
  })
  
  
  label <- sprintf("%s out of %s", rating, max_rate)
  div_out <- div(title = label , "aria-label"=label, role = "img", stars)
  
  as.character(div_out) %>%  
    gt::html()
  
}

linear_sample %>% 
  
  select(Brand, nobs, r.squared, adj.r.squared, p.value) %>%  
  
  mutate(Brand = str_to_title(Brand)) %>%  
  mutate(Rating = cut_number(r.squared, n=5) %>%  as.numeric()) %>% 
  mutate(Rating = map(Rating, star_rating)) %>% 
  
  arrange(desc(adj.r.squared))

 read.csv("free_r_tips-master/011_group_split_linear_regression/kbo.csv", header = T, stringsAsFactors = TRUE) -> base_ball




base_ball %>%  
  select(-연도) %>%  
  
  group_by(구단) %>%  
  group_split() %>%  
  
  
  map_dfr(.f = function(df) {
    
    lm(타석당득점 ~ 출루율 + 장타력 + OPS + wOBA, data= df) %>% 
      glance() %>% 
      
      add_column(team_name = unique(df$구단), .before = 1)
    
    
  }) %>%  
  select(team_name, nobs, r.squared, adj.r.squared, p.value) -> linear_baseball


linear_baseball

linear_baseball %>%  
  filter(nobs >= 5) %>% 
  
  mutate(team_name = as.factor(team_name) %>%  
           str_to_title()) %>%  
  
  mutate(Rates = cut_number(r.squared, n=5) %>% as.numeric()) %>% 
  mutate(Rates = map(Rates, star_rating)) %>%  
  set_names(names(.) %>%  str_to_title()) %>% 
  arrange(desc(R.squared)) %>% 
  
  DT::datatable()

# bikeshop_revenue_tbl %>%  
#       set_names(names(.) %>%  str_replace("_", " ") %>%  str_to_title())

ModelTime + ARIMA

library(modeltime)
library(tidymodels)
library(timetk)
library(tidyverse)

walmart_sales_weekly

walmart_sales_weekly %>% 
  
  group_by(id) %>%  
  plot_time_series(Date, Weekly_Sales,
                   .facet_ncol = 2)

#Nesting = 101-R week 5 Again 

#nesting = Create a "nested" data frame structure. The sturucue is a reshaped data frame with subsets of data contained indsie list 

walmart_sales_weekly %>%  
  select(id, Date, Weekly_Sales) %>% 
  nest(nested_column = - id) -> data_nested

#id 기준 listing 형으로 묶어줌 
#data_nested

data_nested$nested_column

## [[1]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05       24924.
##  2 2010-02-12       46039.
##  3 2010-02-19       41596.
##  4 2010-02-26       19404.
##  5 2010-03-05       21828.
##  6 2010-03-12       21043.
##  7 2010-03-19       22137.
##  8 2010-03-26       26229.
##  9 2010-04-02       57258.
## 10 2010-04-09       42961.
## # ... with 133 more rows
## 
## [[2]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05       13740.
##  2 2010-02-12       10888.
##  3 2010-02-19       11523.
##  4 2010-02-26       11135.
##  5 2010-03-05       12276.
##  6 2010-03-12       10123.
##  7 2010-03-19        9001.
##  8 2010-03-26       10367.
##  9 2010-04-02       11157.
## 10 2010-04-09       10179.
## # ... with 133 more rows
## 
## [[3]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05       40129.
##  2 2010-02-12       37335.
##  3 2010-02-19       38718.
##  4 2010-02-26       35318.
##  5 2010-03-05       38776.
##  6 2010-03-12       34343.
##  7 2010-03-19       34189.
##  8 2010-03-26       33582.
##  9 2010-04-02       38152.
## 10 2010-04-09       34146.
## # ... with 133 more rows
## 
## [[4]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05       41969.
##  2 2010-02-12       36476.
##  3 2010-02-19       37858.
##  4 2010-02-26       37467.
##  5 2010-03-05       40424.
##  6 2010-03-12       35833.
##  7 2010-03-19       36807.
##  8 2010-03-26       35432.
##  9 2010-04-02       38105.
## 10 2010-04-09       37638.
## # ... with 133 more rows
## 
## [[5]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05      115564.
##  2 2010-02-12       94136.
##  3 2010-02-19       98673.
##  4 2010-02-26       92756.
##  5 2010-03-05      108283.
##  6 2010-03-12       98175.
##  7 2010-03-19       71166.
##  8 2010-03-26       95855.
##  9 2010-04-02      101567.
## 10 2010-04-09       98689.
## # ... with 133 more rows
## 
## [[6]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05       64495.
##  2 2010-02-12       70202.
##  3 2010-02-19       62582.
##  4 2010-02-26       57630.
##  5 2010-03-05       63550.
##  6 2010-03-12       63593.
##  7 2010-03-19       62781.
##  8 2010-03-26       55082.
##  9 2010-04-02       63245 
## 10 2010-04-09       66796.
## # ... with 133 more rows
## 
## [[7]]
## # A tibble: 143 x 2
##    Date       Weekly_Sales
##    <date>            <dbl>
##  1 2010-02-05      106690.
##  2 2010-02-12      111390.
##  3 2010-02-19      107952.
##  4 2010-02-26      103653.
##  5 2010-03-05      112808.
##  6 2010-03-12      112048.
##  7 2010-03-19      117716.
##  8 2010-03-26      113117.
##  9 2010-04-02      111466.
## 10 2010-04-09      116771.
## # ... with 133 more rows

data_nested %>%  
  unnest(nested_column)

data_nested %>%  
  
  mutate(fitted_model = map(nested_column, .f = function(df) {
    
    arima_reg(seasonal_period = 52) %>% 
      set_engine("auto_arima") %>%  
      fit(Weekly_Sales ~ Date, data=df)
  })) %>%  
  
  
  mutate(nested_forecast = map2(fitted_model, 
                                nested_column, 
                                .f = function(arima_model, df) {
                                  
                                  modeltime_table(
                                    arima_model
                                  ) %>%  
                                    modeltime_forecast(
                                      h =52, 
                                      actual_data = df
                                    )
                                })) -> model_table
  


model_table %>%  
  select(id, nested_forecast) %>%  
  unnest(nested_forecast) %>%  
  
  group_by(id) %>%  
  plot_modeltime_forecast(.facet_ncol =2)

MAP- ggplot

#install.packages(c("maps", "mapproj"))

library(maps)
library(mapproj)



map_data("world") %>%  
  tibble() -> world_data


# 세계 지도 완성 

world_data %>%  
  
  ggplot() + 
  geom_map(
    aes(long, lat, map_id = region), 
    map = world_data, 
    color = "grey80", fill = "gray30", size= 0.3
  )

world_data %>%  
  
  ggplot() + 
  geom_map(
    aes(long, lat, map_id = region), 
    map = world_data, 
    color = "grey80", fill = "gray30", size= 0.3
  )+ 
  
  coord_map("ortho", 
            orientation = c(39,-98, 0))

map_data("world", region = c("China","Japan","North Korea","South Korea")) %>% 
  tibble() -> ASIA

ASIA %>%  
  ggplot(aes(long, lat, map_id = region)) +
  
  geom_map(
    
    map = ASIA, 
    color = "grey80", fill= "grey30", size = 0.3
  ) + 
  
  coord_map("ortho", 
            orientation= c(20, 130, 0))

usa_tbl <- map_data("state") %>% as_tibble()


maps::votes.repub %>%  
  as.tibble(rownames = "state") %>%  
  
  select(state, '1976') %>%  
  
  rename(repub_prop = '1976') %>% 
  
  
  mutate(repub_prop = repub_prop/100) %>%  
  mutate(state = str_to_lower(state))  -> VOTE_1976


usa_tbl %>%  
  
  left_join(VOTE_1976, by = c("region" = "state")) -> USE_VOTE_1976


USE_VOTE_1976 %>%  
  
  ggplot(aes(long, lat, group = subregion)) +
  
  geom_map(
    
    aes(map_id = region), 
    map = usa_tbl, 
    color = "gray80", fill = "gray30", size = 0.3
  ) + 
  
   coord_map("ortho", orientation = c(39, -98, 0)) +
  
  geom_polygon(aes(group = group, 
                   fill = repub_prop), color = "black")+
   scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                         midpoint = 0.5, labels = scales::percent) +
    theme_minimal()

R-codes package

doeun

2021 6 26

Pivoting

Simple Linear with group_by()

ModelTime + ARIMA

MAP- ggplot