plotly_ch2

chap 2. Plotly로 시각화하기

데이터 전처리를 위한 패키지 설치 및 로딩

if(!require(tidyverse)) { install.packages('tidyverse') 
  library(tidyverse) }

Loading required package: tidyverse

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

if(!require(readxl)) { install.packages('readxl') 
  library(readxl) }

Loading required package: readxl

if(!require(readr)) { install.packages('readr') 
  library(readr) }

if(!require(lubridate)) { install.packages('lubridate') 
  library(lubridate) } ##줄바꿈으로 에러 해결

1. covid19 원본 데이터셋 로딩

covid19 데이터 로딩(파일을 다운로드받은 경우)

setwd("/Users/hyunjhinlee/Desktop/R/25_urop/plotly_ex") ## 작업 경로 세팅
df_covid19 <- read_csv(file = "./owid-covid-data.csv", col_types = cols(date = col_date(format = "%Y-%m-%d") ) ) ## covid19 데이터 로딩(온라인에서 바로 로딩할 경우) # df_covid19 <- read_csv(file = "https://covid.ourworldindata.org/data/owid-covid-data.csv", # col_types = cols(Date = col_date(format = "%Y-%m-%d") # ) # )

2. 전체 데이터셋 중 최근 100일간의 데이터를 필터링한 df_covid19_100 생성

df_covid19_100 <- df_covid19 |>
  ## 한국 데이터와 각 대륙별 데이터만을 필터링
  filter(iso_code %in% c('KOR', 'OWID_ASI', 'OWID_EUR', 'OWID_OCE', 'OWID_NAM', 'OWID_SAM',
                         'OWID_AFR')) |>
  ## 읽은 데이터의 마지막 데이터에서 100일 전 데이터까지 필터링
  filter(date >= max(date) - 100) |>
  ## 국가명을 한글로 변환
  mutate(location = case_when(
    location == 'South Korea' ~ '한국',
    location == 'Asia' ~ '아시아',
    location == 'Europe' ~ '유럽',
    location == 'Oceania' ~ '오세아니아',
    location == 'North America' ~ '북미',
    location == 'South America' ~ '남미',
    location == 'Africa' ~ '아프리카')) |>
  ## 국가 이름의 순서를 설정
  mutate(location = fct_relevel(location, '한국', '아시아', '유럽', '북미', '남미', '아프리카',
                                '오세아니아')) |>
  ## 날짜로 정렬
  arrange(date)

3. df_covid19_100을 한국과 각 대륙별 열로 배치한 넓은 형태의 데이터프레임으로 변환

df_covid19_100_wide <- df_covid19_100 |>
  ## 날짜, 국가명, 확진자와, 백신접종완료자 데이터만 선택
  select(date, location, new_cases, people_fully_vaccinated_per_hundred) |>
  ## 열 이름을 적절히 변경
  rename('date' = 'date', '확진자' = 'new_cases', '백신접종완료자' =
         'people_fully_vaccinated_per_hundred') |>
  ## 넓은 형태의 데이터로 변환
  pivot_wider(id_cols = date, names_from = location,
              values_from = c('확진자', '백신접종완료자')) |>
  ## 날짜로 정렬
  arrange(date)

4. covid19 데이터를 국가별로 요약한 df_covid19_stat 생성

safe_max <- function(x) {
  if (all(is.na(x))) NA else max(x, na.rm = TRUE)
}    ### na값 없는 safe_max 함수를 만들어 사용 
df_covid19_stat <- df_covid19 |>
  group_by(iso_code, continent, location) |>
  summarise( 인구수 = safe_max(population),
            전체사망자수 = sum(new_deaths, na.rm = T),
            백신접종자완료자수 = safe_max(people_fully_vaccinated),
            인구백명당백신접종완료율 = safe_max(people_fully_vaccinated_per_hundred),
            인구백명당부스터접종자수 = safe_max(total_boosters_per_hundred)) |>
  ungroup() |>
  mutate(십만명당사망자수 = round(전체사망자수 / 인구수 *100000, 5),
         백신접종완료율 = 백신접종자완료자수 / 인구수)

`summarise()` has grouped output by 'iso_code', 'continent'. You can override
using the `.groups` argument.

## 여백 설정을 위한 변수 설정
margins_R <- list(t = 50, b = 25, l = 25, r = 25)

5. 대학 학과 취업률 데이터 로딩

library(readxl)
library(dplyr)
df_취업률 <- read_excel('./2021년 학과별 고등교육기관 취업통계.xlsx',
                       ## '별' 시트의 데이터를 불러오는데,. 
                       sheet = '학과별',
                       ## 앞의 13행을 제외하고
                       skip = 13,
                       ## 첫 번째 행은 열 이름으로 설정
                       col_names = TRUE,
                       ## 열의 타입을 설정, 처음 9개는 문자형으로, 다음 79개는 수치형으로 설정
                       col_types =  "text") ##작은따옴표 수정, 모두 텍스트로 읽은뒤, 10열부터 숫자로 지정 

# 2. 숫자로 바꿀 열 이름 벡터 정의 (예: 10번째부터 88번째 열까지)
numeric_cols <- names(df_취업률)[10:88]

# 3. 해당 열만 숫자형으로 변환
df_취업률 <- df_취업률 %>%
  mutate(across(all_of(numeric_cols), ~ as.numeric(.)))


## df_취업률에서 첫 번째부터 9번째까지의 열과 '계'로 끝나는 열을 선택하여 다시 df_취업률에 저장
df_취업률 <- df_취업률 |>
  select(1:9, ends_with('계'), '입대자')

## df_취업률에서 졸업자가 500명 이하인 학과 중 25% 샘플링
df_취업률_500 <- df_취업률 |>
  filter(졸업자_계 < 500) |>
  mutate(id = row_number()) |>
  filter(row_number() %in% seq(from = 1, to = nrow(df_취업률), by = 4))

## 열 이름을 적절히 설정
names(df_취업률_500)[10:12] <- c('졸업자수', '취업률', '취업자수')

6. Plotly 설치

if(!require(plotly))  ## Plotly 로딩이 안 되면
{ install.packages('plotly') } ## Plotly 패키지 설치

Loading required package: plotly


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

library(plotly) ## Plotly 패키지 로딩 }

R에서 Plotly 객체 초기화

df_covid19_100 |> plot_ly()

Warning: No trace type specified and no positional attributes specified

No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter

No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

7. 긴 형태의 100일 코로나19 데이터에서 한국 데이터만을 필터링

df_covid19_100 |>
 
  filter(iso_code == 'KOR') |>
  ## scatter 트레이스의 markers와 lines 모드의 Plotly 시각화 생성
  plot_ly(type = 'scatter', mode = 'markers+lines',
          ## X, Y 축에 변수 매핑, ~ 사용
          x = ~date, y = ~new_cases,
          ## 마커 색상 설정
          marker = list(color = '#264E86'),
          ## 라인 색상과 대시 설정
          line = list(color = '#5E88FC', dash = 'dash')
          )

df_covid19_100 |>
  filter(iso_code == 'KOR') |>
  plot_ly(type = 'scatter', x = ~date, y = ~new_cases,
          mode = 'markers+lines',
          marker = list(color = '#264E86'),
          line = list(color = '#5E88FC', dash = 'dash')
          ) |>
  layout(                                      ## layout 속성의 설정
    title = "코로나19 발생 현황",                 ## 전체 제목 설정
    xaxis = list(title = "날짜", showgrid = F), ## X축 layout 속성 설정
    yaxis = list(title = "확진자수"),            ## y축 layout 속성 설정
    margin = margins_R)                        ## 여백 설정, 4번 참조

Plotly 구조 출력

df_covid19_100 |>
  filter(iso_code == 'KOR') |>
  plot_ly(type = 'scatter', x = ~date, y = ~new_cases,
          mode = 'markers+lines',
          marker = list(color = '#264E86'),
          line = list(color = '#5E88FC', dash = 'dash')) |>
  plotly_json(jsonedit = FALSE) ## Plotly 구조 출력

{
  "visdat": {
    "4f6d4b3c5eb0": ["function () ", "plotlyVisDat"]
  },
  "cur_data": "4f6d4b3c5eb0",
  "attrs": {
    "4f6d4b3c5eb0": {
      "x": {},
      "y": {},
      "mode": "markers+lines",
      "marker": {
        "color": "#264E86"
      },
      "line": {
        "color": "#5E88FC",
        "dash": "dash"
      },
      "alpha_stroke": 1,
      "sizes": [10, 100],
      "spans": [1, 20],
      "type": "scatter"
    }
  },
  "layout": {
    "margin": {
      "b": 40,
      "l": 60,
      "t": 25,
      "r": 10
    },
    "xaxis": {
      "domain": [0, 1],
      "automargin": true,
      "title": "date"
    },
    "yaxis": {
      "domain": [0, 1],
      "automargin": true,
      "title": "new_cases"
    },
    "hovermode": "closest",
    "showlegend": false
  },
  "source": "A",
  "config": {
    "modeBarButtonsToAdd": ["hoverclosest", "hovercompare"],
    "showSendToCloud": false
  },
  "data": [
    {
      "x": ["2022-10-03", "2022-10-04", "2022-10-05", "2022-10-06", "2022-10-07", "2022-10-08", "2022-10-09", "2022-10-10", "2022-10-11", "2022-10-12", "2022-10-13", "2022-10-14", "2022-10-15", "2022-10-16", "2022-10-17", "2022-10-18", "2022-10-19", "2022-10-20", "2022-10-21", "2022-10-22", "2022-10-23", "2022-10-24", "2022-10-25", "2022-10-26", "2022-10-27", "2022-10-28", "2022-10-29", "2022-10-30", "2022-10-31", "2022-11-01", "2022-11-02", "2022-11-03", "2022-11-04", "2022-11-05", "2022-11-06", "2022-11-07", "2022-11-08", "2022-11-09", "2022-11-10", "2022-11-11", "2022-11-12", "2022-11-13", "2022-11-14", "2022-11-15", "2022-11-16", "2022-11-17", "2022-11-18", "2022-11-19", "2022-11-20", "2022-11-21", "2022-11-22", "2022-11-23", "2022-11-24", "2022-11-25", "2022-11-26", "2022-11-27", "2022-11-28", "2022-11-29", "2022-11-30", "2022-12-01", "2022-12-02", "2022-12-03", "2022-12-04", "2022-12-05", "2022-12-06", "2022-12-07", "2022-12-08", "2022-12-09", "2022-12-10", "2022-12-11", "2022-12-12", "2022-12-13", "2022-12-14", "2022-12-15", "2022-12-16", "2022-12-17", "2022-12-18", "2022-12-19", "2022-12-20", "2022-12-21", "2022-12-22", "2022-12-23", "2022-12-24", "2022-12-25", "2022-12-26", "2022-12-27", "2022-12-28", "2022-12-29", "2022-12-30", "2022-12-31", "2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07", "2023-01-08", "2023-01-09", "2023-01-10", "2023-01-11"],
      "y": [16423, 34710, 28603, 22259, 19379, 17654, 8981, 15476, 30503, 26928, 23562, 22757, 21469, 11040, 33190, 29482, 25369, 24709, 26823, 26256, 14302, 43714, 40805, 34950, 35887, 37296, 34511, 18510, 58358, 54740, 46870, 43424, 40863, 36675, 18671, 62273, 62472, 55365, 54519, 54225, 48465, 23765, 72883, 66587, 55437, 49418, 50435, 46011, 23091, 72873, 70324, 59089, 53698, 52648, 47028, 22327, 71476, 67415, 57079, 52987, 52726, 46564, 23160, 77604, 74714, 65253, 62734, 62608, 54319, 25667, 86852, 84571, 70154, 66953, 66752, 58862, 26622, 87559, 88172, 75744, 68168, 66049, 58448, 25545, 87596, 87517, 71427, 65207, 62926, 57527, 22735, 81056, 78575, 64106, 56954, 53608, 46766, 19106, 60041, 54343, 43953],
      "mode": "markers+lines",
      "marker": {
        "color": "#264E86",
        "line": {
          "color": "rgba(31,119,180,1)"
        }
      },
      "line": {
        "color": "#5E88FC",
        "dash": "dash"
      },
      "type": "scatter",
      "error_y": {
        "color": "rgba(31,119,180,1)"
      },
      "error_x": {
        "color": "rgba(31,119,180,1)"
      },
      "xaxis": "x",
      "yaxis": "y",
      "frame": null
    }
  ],
  "highlight": {
    "on": "plotly_click",
    "persistent": false,
    "dynamic": false,
    "selectize": false,
    "opacityDim": 0.20000000000000001,
    "selected": {
      "opacity": 1
    },
    "debounce": 0
  },
  "shinyEvents": ["plotly_hover", "plotly_click", "plotly_selected", "plotly_relayout", "plotly_brushed", "plotly_brushing", "plotly_clickannotation", "plotly_doubleclick", "plotly_deselect", "plotly_afterplot", "plotly_sunburstclick"],
  "base_url": "https://plot.ly"
}