## 載入套件與資料
require(readr)
## 載入需要的套件:readr
require(mice)
## 載入需要的套件:mice
##
## 載入套件:'mice'
## 下列物件被遮斷自 'package:stats':
##
## filter
## 下列物件被遮斷自 'package:base':
##
## cbind, rbind
require(tidyverse)
## 載入需要的套件:tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ purrr 1.0.1
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks mice::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(car)
## 載入需要的套件:car
## 載入需要的套件:carData
##
## 載入套件:'car'
##
## 下列物件被遮斷自 'package:dplyr':
##
## recode
##
## 下列物件被遮斷自 'package:purrr':
##
## some
require(dummy)
## 載入需要的套件:dummy
## dummy 0.1.3
## dummyNews()
require(writexl)
## 載入需要的套件:writexl
library(caret)
## 載入需要的套件:lattice
##
## 載入套件:'caret'
##
## 下列物件被遮斷自 'package:purrr':
##
## lift
library(data.table)
##
## 載入套件:'data.table'
##
## 下列物件被遮斷自 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## 下列物件被遮斷自 'package:dplyr':
##
## between, first, last
##
## 下列物件被遮斷自 'package:purrr':
##
## transpose
#### 資料處理 #####
# 讀進
# 高雄市:小港
# 雲林縣:斗六
#
# 南投縣:竹山
#
# 嘉義市:新港
#
# 金門縣:金門
file_list <- list.files(path = "C:/Users/USER/Desktop/NTHU_Big_Data/空汙資料/全部_2022/123", pattern = ".csv", full.names = TRUE)
dirty_data <- data.frame() # 或者 all_data <- list()
for (file in file_list) {
data <- read_csv(file,locale=locale(encoding="GBK"))
dirty_data <- rbind(dirty_data, data)
}
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 6571 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 5491 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): 代, ら戳, 代兜, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## 保護原始檔案
input_data = dirty_data
## 更改欄位名稱
colnames(input_data) = c("location","time","polution") # 更改欄位名稱
column_names <- names(input_data)
print(column_names)
## [1] "location" "time" "polution" NA NA NA
## [7] NA NA NA NA NA NA
## [13] NA NA NA NA NA NA
## [19] NA NA NA NA NA NA
## [25] NA NA NA
unique_elements <- unique(input_data$location)
d123 = input_data[input_data$location %in% c("翠","ゆせ","λ",
"","穝翠"),]
d123$time<- as.POSIXct(d123$time, format = "%Y/%m/%d %H:%M")
d123$time <- format(d123$time, "%m/%d") # 提取日期
### 去除地點、時間、汙染物行
only_airpolution_num = d123[c(-1,-2,-3)]
view(only_airpolution_num)
### 將list轉成numeric
only_airpolution_num1 = as.data.frame(lapply(only_airpolution_num,as.numeric))
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
## Warning in lapply(only_airpolution_num, as.numeric): 強制變更過程中產生了 NA
### 確認類型轉換
mode(only_airpolution_num1[1,1])
## [1] "numeric"
## 取平均數
# 遇到NA或無法判斷就跳過
custom_row_mean <- function(x) {
sum <- sum(x, na.rm = TRUE)
count <- sum(!is.na(x))
if (count > 0) {
return(sum / count)
} else {
return(0)
}
}
average_airpolution <- apply(only_airpolution_num1, 1, custom_row_mean)
view(average_airpolution)
# 取得名稱
only_airpolution_name = d123[c(1,2,3)]
together = cbind(only_airpolution_name,average_airpolution)
view(together)
# 留下想要的汙染物
data_plot = together
d789 = data_plot[data_plot$polution %in% c("CO","NOx","O3",
"PM10","PM2.5","SO2"),]
# 將各個縣市分開
small_gun = d789[d789$location %in% c("翠"),] # 小港
dosix = d789[d789$location %in% c("ゆせ"),] # 斗六
jusan = d789[d789$location %in% c("λ"),] # 竹山
gold_gate = d789[d789$location %in% c(""),] # 金門
new_gun = d789[d789$location %in% c("穝翠"),] # 新港
# 將location裡面的亂碼修正
small_gun$location = "小港"
dosix$location = "斗六"
jusan$location = "竹山"
gold_gate$location = "金門"
new_gun$location = "新港"
clear_together = rbind(small_gun,dosix,jusan,gold_gate,new_gun)
### 資料視覺化####
library(echarts4r)
library(dplyr)
library(ggplot2)
# 留下想要的汙染物
clear_together_CO = clear_together[clear_together$polution %in% c("CO"),]
clear_together_NOx = clear_together[clear_together$polution %in% c("NOx"),]
clear_together_O3 = clear_together[clear_together$polution %in% c("O3"),]
clear_together_PM10 = clear_together[clear_together$polution %in% c("PM10"),]
clear_together_PM2.5 = clear_together[clear_together$polution %in% c("PM2.5"),]
clear_together_SO2 = clear_together[clear_together$polution %in% c("SO2"),]
library(echarts4r)
library(dplyr)
library(ggplot2)
#### CO #####
# 折線圖
# 可以看到小港制霸,非常牛逼
clear_together_CO %>%
group_by(location) %>%
e_charts(time) %>%
e_line(average_airpolution) %>%
e_title("CO空氣汙染物") %>%
e_datazoom(type = "inside") %>%
e_tooltip("axis")
# 實時排序
clear_together_CO |>
group_by(time) |>
e_chart(location, timeline = TRUE) %>%
e_bar(average_airpolution,
realtimeSort = TRUE, # 開啟即時排序效果
seriesLayoutBy = "column" )%>%
e_flip_coords() |>
e_legend(show = FALSE) |>
e_title("CO空氣汙染物") %>%
e_timeline_opts(autoPlay = TRUE, show = TRUE) %>%
e_labels(
fontSize = 12, # 標籤的字體大小
fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
fontStyle = "normal", # 字體風格normal/italic/oblique
fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
position = "right", # 標籤位置
rotate = 0, # 旋轉角度
align = "rigth", # 水準對齊:left/middle/right
verticalAlign = "top", # 垂直對齊:top/middle/bottom
color = "black"
) # 資料標籤的顏色
#### NOx #####
# 折線圖
# 小港還是制霸,非常牛逼
clear_together_NOx %>%
group_by(location) %>%
e_charts(time) %>%
e_line(average_airpolution) %>%
e_title("NOx空氣汙染物") %>%
e_datazoom(type = "inside") %>%
e_tooltip("axis")
# 實時排序
clear_together_NOx |>
group_by(time) |>
e_chart(location, timeline = TRUE) %>%
e_bar(average_airpolution,
realtimeSort = TRUE, # 開啟即時排序效果
seriesLayoutBy = "column" )%>%
e_flip_coords() |>
e_legend(show = FALSE) |>
e_title("NOx空氣汙染物") %>%
e_timeline_opts(autoPlay = TRUE, show = TRUE) %>%
e_labels(
fontSize = 12, # 標籤的字體大小
fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
fontStyle = "normal", # 字體風格normal/italic/oblique
fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
position = "right", # 標籤位置
rotate = 0, # 旋轉角度
align = "rigth", # 水準對齊:left/middle/right
verticalAlign = "top", # 垂直對齊:top/middle/bottom
color = "black"
) # 資料標籤的顏色
#### O3 #####
# 折線圖
# 竟然是金門制霸,牛逼
clear_together_O3 %>%
group_by(location) %>%
e_charts(time) %>%
e_line(average_airpolution) %>%
e_title("O3空氣汙染物") %>%
e_datazoom(type = "inside") %>%
e_tooltip("axis")
# 實時排序
clear_together_O3 |>
group_by(time) |>
e_chart(location, timeline = TRUE) %>%
e_bar(average_airpolution,
realtimeSort = TRUE, # 開啟即時排序效果
seriesLayoutBy = "column" )%>%
e_flip_coords() |>
e_legend(show = FALSE) |>
e_title("O3空氣汙染物") %>%
e_timeline_opts(autoPlay = TRUE, show = TRUE) %>%
e_labels(
fontSize = 12, # 標籤的字體大小
fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
fontStyle = "normal", # 字體風格normal/italic/oblique
fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
position = "right", # 標籤位置
rotate = 0, # 旋轉角度
align = "rigth", # 水準對齊:left/middle/right
verticalAlign = "top", # 垂直對齊:top/middle/bottom
color = "black"
) # 資料標籤的顏色
#### PM10 #####
# 折線圖
# 竟然不分軒輊
clear_together_PM10 %>%
group_by(location) %>%
e_charts(time) %>%
e_line(average_airpolution) %>%
e_title("PM10空氣汙染物") %>%
e_datazoom(type = "inside") %>%
e_tooltip("axis")
# 實時排序
clear_together_PM10 |>
group_by(time) |>
e_chart(location, timeline = TRUE) %>%
e_bar(average_airpolution,
realtimeSort = TRUE, # 開啟即時排序效果
seriesLayoutBy = "column" )%>%
e_flip_coords() |>
e_legend(show = FALSE) |>
e_title("PM10空氣汙染物") %>%
e_timeline_opts(autoPlay = TRUE, show = TRUE) %>%
e_labels(
fontSize = 12, # 標籤的字體大小
fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
fontStyle = "normal", # 字體風格normal/italic/oblique
fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
position = "right", # 標籤位置
rotate = 0, # 旋轉角度
align = "rigth", # 水準對齊:left/middle/right
verticalAlign = "top", # 垂直對齊:top/middle/bottom
color = "black"
) # 資料標籤的顏色
#### PM2.5 #####
# 折線圖
# PM2.5方面是鬥六以些微差距得到優勢
clear_together_PM2.5 %>%
group_by(location) %>%
e_charts(time) %>%
e_line(average_airpolution) %>%
e_title("PM2.5空氣汙染物") %>%
e_datazoom(type = "inside") %>%
e_tooltip("axis")
# 實時排序
clear_together_PM2.5 |>
group_by(time) |>
e_chart(location, timeline = TRUE) %>%
e_bar(average_airpolution,
realtimeSort = TRUE, # 開啟即時排序效果
seriesLayoutBy = "column" )%>%
e_flip_coords() |>
e_legend(show = FALSE) |>
e_title("PM2.5空氣汙染物") %>%
e_timeline_opts(autoPlay = TRUE, show = TRUE) %>%
e_labels(
fontSize = 12, # 標籤的字體大小
fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
fontStyle = "normal", # 字體風格normal/italic/oblique
fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
position = "right", # 標籤位置
rotate = 0, # 旋轉角度
align = "rigth", # 水準對齊:left/middle/right
verticalAlign = "top", # 垂直對齊:top/middle/bottom
color = "black"
) # 資料標籤的顏色
#### SO2 #####
# 折線圖
# 靠北,小港SO2高成這樣是三小
clear_together_SO2 %>%
group_by(location) %>%
e_charts(time) %>%
e_line(average_airpolution) %>%
e_title("SO2空氣汙染物") %>%
e_datazoom(type = "inside") %>%
e_tooltip("axis")
# 實時排序
clear_together_SO2 |>
group_by(time) |>
e_chart(location, timeline = TRUE) %>%
e_bar(average_airpolution,
realtimeSort = TRUE, # 開啟即時排序效果
seriesLayoutBy = "column" )%>%
e_flip_coords() |>
e_legend(show = FALSE) |>
e_title("SO2空氣汙染物") %>%
e_timeline_opts(autoPlay = TRUE, show = TRUE) %>%
e_labels(
fontSize = 12, # 標籤的字體大小
fontWeight = "bold", # 字體粗細normal/bold/bolder/lighter
fontStyle = "normal", # 字體風格normal/italic/oblique
fontFamily = "serif", # 字體,可選'sans-serif','monospace','Arial','Microsoft YaHei' ...
position = "right", # 標籤位置
rotate = 0, # 旋轉角度
align = "rigth", # 水準對齊:left/middle/right
verticalAlign = "top", # 垂直對齊:top/middle/bottom
color = "black"
) # 資料標籤的顏色