## 載入套件與資料
require(readr)
## 載入需要的套件:readr
require(mice)
## 載入需要的套件:mice
## 
## 載入套件:'mice'
## 下列物件被遮斷自 'package:stats':
## 
##     filter
## 下列物件被遮斷自 'package:base':
## 
##     cbind, rbind
require(tidyverse)
## 載入需要的套件:tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ purrr     1.0.1
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks mice::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(car)
## 載入需要的套件:car
## 載入需要的套件:carData
## 
## 載入套件:'car'
## 
## 下列物件被遮斷自 'package:dplyr':
## 
##     recode
## 
## 下列物件被遮斷自 'package:purrr':
## 
##     some
require(dummy)
## 載入需要的套件:dummy
## dummy 0.1.3
## dummyNews()
require(writexl)
## 載入需要的套件:writexl
library(caret)
## 載入需要的套件:lattice
## 
## 載入套件:'caret'
## 
## 下列物件被遮斷自 'package:purrr':
## 
##     lift
library(data.table)
## 
## 載入套件:'data.table'
## 
## 下列物件被遮斷自 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## 下列物件被遮斷自 'package:dplyr':
## 
##     between, first, last
## 
## 下列物件被遮斷自 'package:purrr':
## 
##     transpose
#### 資料處理 #####

# 讀進

# 高雄市:小港

# 雲林縣:斗六
# 
# 南投縣:竹山
# 
# 嘉義市:新港
# 
# 金門縣:金門
file_list <- list.files(path = "C:/Users/USER/Desktop/NTHU_Big_Data/空汙資料/全部_2022/123", pattern = ".csv", full.names = TRUE)
dirty_data <- data.frame()  # 或者 all_data <- list()
for (file in file_list) {
  data <- read_csv(file,locale=locale(encoding="GBK"))
  dirty_data <- rbind(dirty_data, data)
}
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 5491 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## 保護原始檔案
input_data = dirty_data

## 更改欄位名稱
colnames(input_data) = c("location","time","polution") # 更改欄位名稱
column_names <- names(input_data)
print(column_names)
##  [1] "location" "time"     "polution" NA         NA         NA        
##  [7] NA         NA         NA         NA         NA         NA        
## [13] NA         NA         NA         NA         NA         NA        
## [19] NA         NA         NA         NA         NA         NA        
## [25] NA         NA         NA
unique_elements <- unique(input_data$location)

d123 = input_data[input_data$location %in% c("翠","ゆせ","λ",
                                           "","穝翠"),]
d123$time<- as.POSIXct(d123$time, format = "%Y/%m/%d %H:%M")
d123$time <- format(d123$time, "%m/%d")  # 提取日期

### 去除地點、時間、汙染物行
only_airpolution_num = d123[c(-1,-2,-3)]
view(only_airpolution_num)

### 將list轉成numeric
only_airpolution_num1 = as.data.frame(lapply(only_airpolution_num,as.numeric)) 
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA

## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
### 確認類型轉換
mode(only_airpolution_num1[1,1])
## [1] "numeric"
## 取平均數

# 遇到NA或無法判斷就跳過
custom_row_mean <- function(x) {
  sum <- sum(x, na.rm = TRUE)
  count <- sum(!is.na(x))
  if (count > 0) {
    return(sum / count)
  } else {
    return(0) 
  }
}

average_airpolution <- apply(only_airpolution_num1, 1, custom_row_mean)


view(average_airpolution)

# 取得名稱
only_airpolution_name = d123[c(1,2,3)]

together = cbind(only_airpolution_name,average_airpolution)
view(together)

# 留下想要的汙染物
data_plot = together

d789 = data_plot[data_plot$polution %in% c("CO","NOx","O3",
                                           "PM10","PM2.5","SO2"),]

# 將各個縣市分開
small_gun = d789[d789$location %in% c("翠"),] # 小港
dosix = d789[d789$location %in% c("ゆせ"),] # 斗六
jusan = d789[d789$location %in% c("λ"),] # 竹山
gold_gate = d789[d789$location %in% c(""),] # 金門
new_gun = d789[d789$location %in% c("穝翠"),] # 新港

# 將location裡面的亂碼修正

small_gun$location = "小港"
dosix$location = "斗六"
jusan$location = "竹山"
gold_gate$location = "金門"
new_gun$location = "新港"


clear_together = rbind(small_gun,dosix,jusan,gold_gate,new_gun)



### 資料視覺化####

library(echarts4r)
library(dplyr)
library(ggplot2)
# 留下想要的汙染物
clear_together_CO = clear_together[clear_together$polution %in% c("CO"),]
clear_together_NOx = clear_together[clear_together$polution %in% c("NOx"),]
clear_together_O3 = clear_together[clear_together$polution %in% c("O3"),]
clear_together_PM10 = clear_together[clear_together$polution %in% c("PM10"),]
clear_together_PM2.5 = clear_together[clear_together$polution %in% c("PM2.5"),]
clear_together_SO2 = clear_together[clear_together$polution %in% c("SO2"),]



library(echarts4r)
library(dplyr)
library(ggplot2)
#### CO #####
# 折線圖
# 可以看到小港制霸,非常牛逼
clear_together_CO %>% 
  group_by(location) %>% 
  e_charts(time) %>% 
  e_line(average_airpolution) %>% 
  e_title("CO空氣汙染物") %>% 
  e_datazoom(type = "inside") %>% 
  e_tooltip("axis")
# 實時排序

clear_together_CO  |>
  group_by(time) |>
  e_chart(location, timeline = TRUE) %>% 
  e_bar(average_airpolution,
        realtimeSort = TRUE, # 開啟即時排序效果
        seriesLayoutBy = "column" )%>%
  e_flip_coords() |>
  e_legend(show = FALSE) |>
  e_title("CO空氣汙染物") %>%
  e_timeline_opts(autoPlay = TRUE, show = TRUE) %>% 
  e_labels(
    fontSize = 12, # 標籤的字體大小
    fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
    fontStyle = "normal", # 字體風格normal/italic/oblique
    fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
    position = "right", # 標籤位置
    rotate = 0, # 旋轉角度
    align = "rigth", # 水準對齊:left/middle/right
    verticalAlign = "top", # 垂直對齊:top/middle/bottom
    color = "black"
  ) # 資料標籤的顏色
#### NOx #####
# 折線圖
# 小港還是制霸,非常牛逼
clear_together_NOx %>% 
  group_by(location) %>% 
  e_charts(time) %>% 
  e_line(average_airpolution) %>% 
  e_title("NOx空氣汙染物") %>% 
  e_datazoom(type = "inside") %>% 
  e_tooltip("axis")
# 實時排序

clear_together_NOx  |>
  group_by(time) |>
  e_chart(location, timeline = TRUE) %>% 
  e_bar(average_airpolution,
        realtimeSort = TRUE, # 開啟即時排序效果
        seriesLayoutBy = "column" )%>%
  e_flip_coords() |>
  e_legend(show = FALSE) |>
  e_title("NOx空氣汙染物") %>%
  e_timeline_opts(autoPlay = TRUE, show = TRUE) %>% 
  e_labels(
    fontSize = 12, # 標籤的字體大小
    fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
    fontStyle = "normal", # 字體風格normal/italic/oblique
    fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
    position = "right", # 標籤位置
    rotate = 0, # 旋轉角度
    align = "rigth", # 水準對齊:left/middle/right
    verticalAlign = "top", # 垂直對齊:top/middle/bottom
    color = "black"
  ) # 資料標籤的顏色
#### O3 #####
# 折線圖
# 竟然是金門制霸,牛逼
clear_together_O3 %>% 
  group_by(location) %>% 
  e_charts(time) %>% 
  e_line(average_airpolution) %>% 
  e_title("O3空氣汙染物") %>% 
  e_datazoom(type = "inside") %>% 
  e_tooltip("axis")
# 實時排序

clear_together_O3  |>
  group_by(time) |>
  e_chart(location, timeline = TRUE) %>% 
  e_bar(average_airpolution,
        realtimeSort = TRUE, # 開啟即時排序效果
        seriesLayoutBy = "column" )%>%
  e_flip_coords() |>
  e_legend(show = FALSE) |>
  e_title("O3空氣汙染物") %>%
  e_timeline_opts(autoPlay = TRUE, show = TRUE) %>% 
  e_labels(
    fontSize = 12, # 標籤的字體大小
    fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
    fontStyle = "normal", # 字體風格normal/italic/oblique
    fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
    position = "right", # 標籤位置
    rotate = 0, # 旋轉角度
    align = "rigth", # 水準對齊:left/middle/right
    verticalAlign = "top", # 垂直對齊:top/middle/bottom
    color = "black"
  ) # 資料標籤的顏色
#### PM10 #####
# 折線圖
# 竟然不分軒輊
clear_together_PM10 %>% 
  group_by(location) %>% 
  e_charts(time) %>% 
  e_line(average_airpolution) %>% 
  e_title("PM10空氣汙染物") %>% 
  e_datazoom(type = "inside") %>% 
  e_tooltip("axis")
# 實時排序

clear_together_PM10  |>
  group_by(time) |>
  e_chart(location, timeline = TRUE) %>% 
  e_bar(average_airpolution,
        realtimeSort = TRUE, # 開啟即時排序效果
        seriesLayoutBy = "column" )%>%
  e_flip_coords() |>
  e_legend(show = FALSE) |>
  e_title("PM10空氣汙染物") %>%
  e_timeline_opts(autoPlay = TRUE, show = TRUE) %>% 
  e_labels(
    fontSize = 12, # 標籤的字體大小
    fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
    fontStyle = "normal", # 字體風格normal/italic/oblique
    fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
    position = "right", # 標籤位置
    rotate = 0, # 旋轉角度
    align = "rigth", # 水準對齊:left/middle/right
    verticalAlign = "top", # 垂直對齊:top/middle/bottom
    color = "black"
  ) # 資料標籤的顏色
#### PM2.5 #####
# 折線圖
# PM2.5方面是鬥六以些微差距得到優勢
clear_together_PM2.5 %>% 
  group_by(location) %>% 
  e_charts(time) %>% 
  e_line(average_airpolution) %>% 
  e_title("PM2.5空氣汙染物") %>% 
  e_datazoom(type = "inside") %>% 
  e_tooltip("axis")
# 實時排序

clear_together_PM2.5  |>
  group_by(time) |>
  e_chart(location, timeline = TRUE) %>% 
  e_bar(average_airpolution,
        realtimeSort = TRUE, # 開啟即時排序效果
        seriesLayoutBy = "column" )%>%
  e_flip_coords() |>
  e_legend(show = FALSE) |>
  e_title("PM2.5空氣汙染物") %>%
  e_timeline_opts(autoPlay = TRUE, show = TRUE) %>% 
  e_labels(
    fontSize = 12, # 標籤的字體大小
    fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
    fontStyle = "normal", # 字體風格normal/italic/oblique
    fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
    position = "right", # 標籤位置
    rotate = 0, # 旋轉角度
    align = "rigth", # 水準對齊:left/middle/right
    verticalAlign = "top", # 垂直對齊:top/middle/bottom
    color = "black"
  ) # 資料標籤的顏色
#### SO2 #####
# 折線圖
# 靠北,小港SO2高成這樣是三小
clear_together_SO2 %>% 
  group_by(location) %>% 
  e_charts(time) %>% 
  e_line(average_airpolution) %>% 
  e_title("SO2空氣汙染物") %>% 
  e_datazoom(type = "inside") %>% 
  e_tooltip("axis")
# 實時排序

clear_together_SO2  |>
  group_by(time) |>
  e_chart(location, timeline = TRUE) %>% 
  e_bar(average_airpolution,
        realtimeSort = TRUE, # 開啟即時排序效果
        seriesLayoutBy = "column" )%>%
  e_flip_coords() |>
  e_legend(show = FALSE) |>
  e_title("SO2空氣汙染物") %>%
  e_timeline_opts(autoPlay = TRUE, show = TRUE) %>% 
  e_labels(
    fontSize = 12, # 標籤的字體大小
    fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
    fontStyle = "normal", # 字體風格normal/italic/oblique
    fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
    position = "right", # 標籤位置
    rotate = 0, # 旋轉角度
    align = "rigth", # 水準對齊:left/middle/right
    verticalAlign = "top", # 垂直對齊:top/middle/bottom
    color = "black"
  ) # 資料標籤的顏色