Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## Warning in Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8"): 作業系統
## 回報無法實現設定語區為 "zh_TW.UTF-8" 的要求
安裝需要的packages
packages = c("dplyr","ggplot2", "data.table", "scales", "tidytext","wordcloud2","lubridate","sqldf")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
載入需要的packages以及資料
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
require(data.table)
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
require(scales)
## Loading required package: scales
require(wordcloud2)
## Loading required package: wordcloud2
require(tidytext)
## Loading required package: tidytext
require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
require(sqldf)
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
載入資料 將日期轉年月
datacsv <- fread("./data/a4.csv", encoding = "UTF-8")
datacsv$artDate <- ymd(datacsv$date)#將日期轉時間 使用lubridate套件
datacsv$artYear <- paste0(year(datacsv$artDate) , month(datacsv$artDate))#取得年分
datacsv$artYM <- datacsv$artDate %>% format(format = "%Y%m") #取得死亡月份
str(datacsv) #檢視資料型態
## Classes 'data.table' and 'data.frame': 14507 obs. of 31 variables:
## $ V1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ id : int 87494 87495 87497 87499 87500 70054 70056 70057 70058 70059 ...
## $ ma_way : chr "白河" "白河" "白河" "白河" ...
## $ road_id : int 3 3 3 3 3 3 3 3 3 3 ...
## $ road_way : chr "S" "S" "S" "S" ...
## $ mileage : num 271 331 291 317 324 ...
## $ date : int 20190618 20190618 20190618 20190620 20190620 20160718 20160702 20160703 20160703 20160708 ...
## $ work_type : chr "外路肩" "外路肩" "外車道" "中車道" ...
## $ aminal : chr "喜鵲" "鴿子" "貓" "貓" ...
## $ pre_aminal : chr "樹鵲" "鴿子" "貓" "貓" ...
## $ gate : chr "03F2709S" "03F3392S" "03F2923S" "03F3187S" ...
## $ X : int 600136 653656 622436 635818 644738 502593 407793 407794 418944 363199 ...
## $ dt : int 20190618 20190618 20190618 20190620 20190620 20160718 20160702 20160703 20160703 20160708 ...
## $ road_id2 : int 3 3 3 3 3 3 3 3 3 3 ...
## $ road_way2 : chr "S" "S" "S" "S" ...
## $ mileage2 : chr "270.9" "339.2" "292.3" "318.7" ...
## $ avg_speed_31: chr "35.8208333333333" "24.8" "21.8958333333333" "43.2583333333333" ...
## $ avg_speed_32: chr "36.3833333333333" "24.3652173913043" "21.3666666666667" "42.8869565217391" ...
## $ avg_speed_41: chr "37.7583333333333" "33.7090909090909" "26.1454545454545" "72.3428571428571" ...
## $ avg_speed_42: chr "47.5772727272727" "31.2727272727273" "38.6" "48.1705882352941" ...
## $ avg_speed_5 : chr "49.1368421052632" "42.7954545454545" "60.7913043478261" "53.3133333333333" ...
## $ car_count_31: int 1639 2741 3281 1179 1495 5464 22074 21876 24343 2565 ...
## $ car_count_32: int 607 1132 1435 478 626 2028 5584 4718 5666 555 ...
## $ car_count_41: int 41 29 34 13 11 51 597 514 769 23 ...
## $ car_count_42: int 182 422 414 76 187 557 980 377 273 108 ...
## $ car_count_5 : int 50 451 279 88 227 300 499 218 151 2 ...
## $ start : num 262 331 290 310 321 ...
## $ end : num 271 339 292 319 326 ...
## $ artDate : Date, format: "2019-06-18" "2019-06-18" ...
## $ artYear : chr "20196" "20196" "20196" "20196" ...
## $ artYM : chr "201906" "201906" "201906" "201906" ...
## - attr(*, ".internal.selfref")=<externalptr>
資料欄位
- Column1 :
- id : 編號
- ma_way : 地點
- road_id : 國道
- road_way : 國道方向S=北 N=南
- mileage : 里程
- date : 日期
- work_type : 路段
- aminal : 種類
- pre_aminal : 可能種類
- gate : ETC的編號
- X :
- dt : 日期
- road_id_1 : 國道
- road_way_2 : 國道方向S=北 N=南
- mileage_3 : 里程
- avg_speed_31 : 小客車平均車速
- avg_speed_32 : 小貨車平均車速
- avg_speed_41 : 大客車平均車速
- avg_speed_42 : 大貨車平均車速
- avg_speed_5 : 連結車平均車速
- car_count_31 : 小客車數量
- car_count_32 : 小貨車數量
- car_count_41 : 大客車數量
- car_count_42 : 大貨車數量
- car_count_5 : 連結車數量
- start :
- end :
資料清洗
- 清洗前資料:14507筆
- 清洗後資料:XXXXX筆
- 移除 aminal != NA 資料
- 移除 aminal != 其它資料 資料
- 移除 work_type != NA 資料
- 取代 aminal 有屍的字段
a1s2 <- sqldf("select
id,
ma_way,
road_id,
road_way,
mileage,
date,
work_type,
replace( aminal, '屍','')aminal,
pre_aminal,
gate,
X,
dt,
road_id2,
road_way2,
mileage2,
avg_speed_31,
avg_speed_32,
avg_speed_41,
avg_speed_42,
avg_speed_5,
car_count_31,
car_count_32,
car_count_41,
car_count_42,
car_count_5,
start,
end,
artDate,
artYear,
artYM
from datacsv
where aminal != 'NA'
AND aminal != '其它'
AND work_type != 'NA'
ORDER BY date,ma_way ")
Ch.0 動物死亡在公路段車速是否有關係
Ch.1 動物死亡率是否根據季節是否有關係
Ch.2 動物死亡率根據車流量是否有關係