Desciptive Statistics
2024-08-13
#TABLES
##Read data
<-read.csv("/Users/kennyorielolana/Library/CloudStorage/OneDrive-ChiangMaiUniversity/Dissertation-Kenny’s MacBook Air/Analysis/clean_data.csv")
data
library(ggplot2)
library(ggthemes)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#Data structure
str(data)
## 'data.frame': 10781 obs. of 12 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ Month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : chr "2017-01" "2017-01" "2017-01" "2017-01" ...
## $ Region : chr "1" "1" "1" "1" ...
## $ Region_name: chr "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" ...
## $ Province : chr "DAGUPAN CITY" "ILOCOS NORTE" "ILOCOS SUR" "LA UNION" ...
## $ Location_ID: int 126 143 144 153 188 29 103 149 178 194 ...
## $ Cases : int 22 16 38 71 118 133 8 98 14 15 ...
## $ X.long : num 120 121 121 120 120 ...
## $ Y.lat : num 16.1 18.2 17.2 16.6 16 ...
## $ Population : int 171460 594105 690674 788851 2969273 1203559 135654 1599858 454993 189888 ...
#Summary for all observation
summary(data$Cases)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 20 58 147 162 8934
#No. of cases per year
<-data %>%
case_per_yeargroup_by(Year) %>%
summarize(Total=sum(Cases), Median=median(Cases), Q3=quantile(Cases, probs=0.75), Q1=quantile(Cases, probs = 0.25), IQR=Q3-Q1)
print(case_per_year)
## # A tibble: 8 × 6
## Year Total Median Q3 Q1 IQR
## <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2017 152159 46 118 19 99
## 2 2018 249398 78.5 212 31 181
## 3 2019 437089 134 324 49 275
## 4 2020 88636 23 60 7 53
## 5 2021 80919 22 61.2 8 53.2
## 6 2022 264390 88 193 31 162
## 7 2023 216874 89 189. 35 154.
## 8 2024 95723 57 160. 21 139.
#Average annual cases (2017-July 2024)
summary(case_per_year$Total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 80919 93951 184516 198148 253146 437089
sd(case_per_year$Total)
## [1] 121184.3
ggplot(case_per_year, aes(Year,Total)) +
geom_point(size=4, colour = "blue") +
geom_line() +labs(title = "Dengue Cases in the Philippines, 2017- July 2024",
x = "Year",
y = "Number of Cases") +
scale_y_continuous(labels = comma) + # Format y-axis labels to 6-digit numbers
scale_x_continuous(breaks = 2017:2024) + # Ensure x-axis shows all years
theme_minimal() + # Adding a clean theme
theme(plot.title = element_text(hjust = 0.5)) #Center the plot title
#No. of cases per year per month
<-data %>%
case_by_yearmonthgroup_by(Year, Month) %>%
summarize(Total=sum(Cases), Mean=mean(Cases), Median=median(Cases), Q3=quantile(Cases, probs=0.75), Q1=quantile(Cases, probs = 0.25), IQR=Q3-Q1) %>%
mutate(Month=as.factor(Month))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
print(case_by_yearmonth, n=91)
## # A tibble: 91 × 8
## # Groups: Year [8]
## Year Month Total Mean Median Q3 Q1 IQR
## <int> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017 1 14217 120. 64.5 154 26.2 128.
## 2 2017 2 9306 78.2 41 96 16.5 79.5
## 3 2017 3 7425 62.9 35 72.8 13 59.8
## 4 2017 4 5157 44.5 25.5 55.5 11 44.5
## 5 2017 5 5547 47.4 26 58 11 47
## 6 2017 6 9800 82.4 43 93 16 77
## 7 2017 7 18278 155. 71.5 169. 29.2 140.
## 8 2017 8 24961 206. 96 241 33 208
## 9 2017 9 17365 147. 64.5 158. 22 136.
## 10 2017 10 15018 126. 59 124. 22 102.
## 11 2017 11 14156 120. 55 116 25 91
## 12 2017 12 10929 91.1 40 88 18 70
## 13 2018 1 14253 118. 66 108 33 75
## 14 2018 2 9727 81.1 41 84.2 20 64.2
## 15 2018 3 7822 64.6 38 72 19 53
## 16 2018 4 5798 48.3 30 59 14.8 44.2
## 17 2018 5 8067 67.8 44 86 20 66
## 18 2018 6 15545 130. 85 171. 30 141.
## 19 2018 7 30555 255. 152. 315 49.5 266.
## 20 2018 8 37919 313. 202 394 66 328
## 21 2018 9 31459 260. 164 310 68 242
## 22 2018 10 33011 273. 161 354 75 279
## 23 2018 11 30238 250. 129 348 56 292
## 24 2018 12 25004 207. 112 317 44 273
## 25 2019 1 32459 270. 164. 378. 79.2 299.
## 26 2019 2 20271 168. 117 229 47 182
## 27 2019 3 14053 117. 79 167 33.8 133.
## 28 2019 4 9729 80.4 45 107 19 88
## 29 2019 5 13946 115. 54 124 22 102
## 30 2019 6 32901 272. 124 328 53 275
## 31 2019 7 78436 648. 295 917 138 779
## 32 2019 8 90284 752. 427 855. 190 665.
## 33 2019 9 61711 510. 293 565 147 418
## 34 2019 10 40988 339. 204 402 96 306
## 35 2019 11 25918 214. 111 261 58 203
## 36 2019 12 16393 135. 67 160 29 131
## 37 2020 1 25307 211. 110. 233 54.8 178.
## 38 2020 2 14657 121. 66 155 33 122
## 39 2020 3 6771 56.9 31 76 12 64
## 40 2020 4 2227 20.6 11.5 29.5 4 25.5
## 41 2020 5 2312 21.0 10 29.5 4 25.5
## 42 2020 6 3672 33.1 16 44 5 39
## 43 2020 7 6124 53.7 23 63.8 6 57.8
## 44 2020 8 6047 51.2 20 50 7.25 42.8
## 45 2020 9 5135 43.9 14 39 6 33
## 46 2020 10 4946 43.0 15 41.5 6 35.5
## 47 2020 11 6130 54.2 18 52 6 46
## 48 2020 12 5308 47.8 12 38 5 33
## 49 2021 1 9124 76.7 23 68 9 59
## 50 2021 2 6962 59 19 56.2 6.25 50
## 51 2021 3 5894 51.3 19 51.5 6 45.5
## 52 2021 4 3803 33.4 13 42 6 36
## 53 2021 5 3883 34.1 13 44 5 39
## 54 2021 6 5564 49.2 17 54 6 48
## 55 2021 7 9601 84.2 31 82.8 10.2 72.5
## 56 2021 8 8847 76.9 27 73.5 10.5 63
## 57 2021 9 5694 48.7 20 54 8 46
## 58 2021 10 7001 61.4 28 68 11.2 56.8
## 59 2021 11 8836 75.5 31 90 16 74
## 60 2021 12 5710 50.1 23 63.5 10 53.5
## 61 2022 1 5561 47.1 29.5 58.8 12 46.8
## 62 2022 2 5945 49.5 30.5 67.5 11 56.5
## 63 2022 3 8169 68.6 40 97 15 82
## 64 2022 4 11698 96.7 57 132 20 112
## 65 2022 5 20927 173. 101 234 33 201
## 66 2022 6 35204 291. 163 384 60 324
## 67 2022 7 49626 410. 170 453 90 363
## 68 2022 8 41986 347. 174 383 88 295
## 69 2022 9 31496 260. 155 289 69 220
## 70 2022 10 21705 179. 110 216 51 165
## 71 2022 11 18977 157. 114 185 48 137
## 72 2022 12 13096 108. 69 132 31 101
## 73 2023 1 13875 116. 76 161. 34.8 126
## 74 2023 2 11099 91.7 61 127 21 106
## 75 2023 3 10124 84.4 54 118. 20 98.5
## 76 2023 4 9264 77.2 47 105 18.8 86.2
## 77 2023 5 13013 109. 57 154. 24 130.
## 78 2023 6 19427 163. 95 223 36.5 186.
## 79 2023 7 23288 192. 119 260 43 217
## 80 2023 8 24022 199. 119 288 48 240
## 81 2023 9 25231 209. 128 313 54 259
## 82 2023 10 26073 215. 144 315 59 256
## 83 2023 11 23011 190. 132 239 55 184
## 84 2023 12 18447 154. 95 212. 49.2 163.
## 85 2024 1 19050 157. 95 210 44 166
## 86 2024 2 15473 129. 82 188. 26.5 162
## 87 2024 3 13986 117. 56.5 166. 24.5 141.
## 88 2024 4 11503 98.3 51 127 23 104
## 89 2024 5 13037 112. 58 168. 24.5 144.
## 90 2024 6 20447 175. 102 281 28 253
## 91 2024 7 2227 21.6 12 28.5 4 24.5
#Plot monthly cases by year
ggplot(case_by_yearmonth, aes(Month,Total)) +
geom_col(width = 1, fill = "darkred") + # plot the count data as columns
theme_minimal()+ # simplify the background panels
labs( # add plot labels, title, etc.
x = "Month",
y = "Dengue cases",
title = "Dengue cases by year-month") +
facet_wrap(~Year) # the facets are created)
#Box plot of cases per month
ggplot(case_by_yearmonth, aes(x=Month, y=Total)) +
geom_boxplot() +
labs(
x = "Month",
y = "Dengue cases",
title = "Dengue cases by Month (2017-July 2024)")
#Reports per Province/city Epidemiology units & percentage of reporting
<-data %>%
reportsgroup_by(Province) %>%
count() %>%
arrange(desc(n)) %>%
mutate(Percent=n/91*100)
print(reports, n=121)
## # A tibble: 121 × 3
## # Groups: Province [121]
## Province n Percent
## <chr> <int> <dbl>
## 1 LEYTE 92 101.
## 2 AGUSAN DEL NORTE 91 100
## 3 AGUSAN DEL SUR 91 100
## 4 ANGELES CITY 91 100
## 5 ANTIQUE 91 100
## 6 AURORA 91 100
## 7 BACOLOD CITY 91 100
## 8 BATAAN 91 100
## 9 BATANGAS 91 100
## 10 BENGUET 91 100
## 11 BOHOL 91 100
## 12 BUKIDNON 91 100
## 13 BULACAN 91 100
## 14 CAGAYAN DE ORO CITY 91 100
## 15 CALOOCAN CITY 91 100
## 16 CAMARINES SUR 91 100
## 17 CAPIZ 91 100
## 18 CAVITE 91 100
## 19 CEBU 91 100
## 20 CEBU CITY 91 100
## 21 CITY OF ISABELA 91 100
## 22 CITY OF MAKATI 91 100
## 23 CITY OF MALABON 91 100
## 24 CITY OF MANDALUYONG 91 100
## 25 CITY OF MANILA 91 100
## 26 CITY OF MARIKINA 91 100
## 27 CITY OF VALENZUELA 91 100
## 28 COTABATO (NORTH COTABATO) 91 100
## 29 COTABATO CITY 91 100
## 30 DAGUPAN CITY 91 100
## 31 DAVAO CITY 91 100
## 32 DAVAO DE ORO 91 100
## 33 DAVAO DEL NORTE 91 100
## 34 DAVAO DEL SUR 91 100
## 35 GENERAL SANTOS CITY (DADIANGAS) 91 100
## 36 IFUGAO 91 100
## 37 ILIGAN CITY 91 100
## 38 ILOCOS NORTE 91 100
## 39 ILOCOS SUR 91 100
## 40 ILOILO 91 100
## 41 ISABELA 91 100
## 42 KALINGA 91 100
## 43 LA UNION 91 100
## 44 LAGUNA 91 100
## 45 LANAO DEL NORTE 91 100
## 46 MANDAUE CITY 91 100
## 47 MISAMIS OCCIDENTAL 91 100
## 48 MISAMIS ORIENTAL 91 100
## 49 NEGROS OCCIDENTAL 91 100
## 50 NEGROS ORIENTAL 91 100
## 51 NUEVA ECIJA 91 100
## 52 NUEVA VIZCAYA 91 100
## 53 OCCIDENTAL MINDORO 91 100
## 54 ORIENTAL MINDORO 91 100
## 55 PALAWAN 91 100
## 56 PAMPANGA 91 100
## 57 PANGASINAN 91 100
## 58 PUERTO PRINCESA CITY 91 100
## 59 QUEZON 91 100
## 60 QUEZON CITY 91 100
## 61 QUIRINO 91 100
## 62 RIZAL 91 100
## 63 ROMBLON 91 100
## 64 SARANGANI 91 100
## 65 SOUTH COTABATO 91 100
## 66 SURIGAO DEL NORTE 91 100
## 67 SURIGAO DEL SUR 91 100
## 68 TARLAC 91 100
## 69 ZAMBOANGA CITY 91 100
## 70 ZAMBOANGA DEL NORTE 91 100
## 71 ZAMBOANGA DEL SUR 91 100
## 72 ZAMBOANGA SIBUGAY 91 100
## 73 APAYAO 90 98.9
## 74 BAGUIO CITY 90 98.9
## 75 CAGAYAN 90 98.9
## 76 CITY OF LAS PINAS 90 98.9
## 77 CITY OF PARANAQUE 90 98.9
## 78 CITY OF PASIG 90 98.9
## 79 DAVAO ORIENTAL 90 98.9
## 80 ILOILO CITY 90 98.9
## 81 NAGA CITY 90 98.9
## 82 ORMOC CITY 90 98.9
## 83 PASAY CITY 90 98.9
## 84 SAMAR (WESTERN SAMAR) 90 98.9
## 85 SULTAN KUDARAT 90 98.9
## 86 TACLOBAN CITY 90 98.9
## 87 ZAMBALES 90 98.9
## 88 BASILAN 89 97.8
## 89 BUTUAN CITY 89 97.8
## 90 LAPU-LAPU CITY (OPON) 89 97.8
## 91 NORTHERN SAMAR 89 97.8
## 92 TAGUIG CITY 89 97.8
## 93 CAMARINES NORTE 88 96.7
## 94 OLONGAPO CITY 88 96.7
## 95 SIQUIJOR 88 96.7
## 96 SOUTHERN LEYTE 88 96.7
## 97 ABRA 87 95.6
## 98 BILIRAN 87 95.6
## 99 CITY OF MUNTINLUPA 87 95.6
## 100 CITY OF NAVOTAS 87 95.6
## 101 CITY OF SAN JUAN 87 95.6
## 102 EASTERN SAMAR 87 95.6
## 103 MOUNTAIN PROVINCE 87 95.6
## 104 AKLAN 86 94.5
## 105 CITY OF SANTIAGO 86 94.5
## 106 GUIMARAS 86 94.5
## 107 LANAO DEL SUR 86 94.5
## 108 MASBATE 86 94.5
## 109 SULU 86 94.5
## 110 ALBAY 84 92.3
## 111 DAVAO OCCIDENTAL 84 92.3
## 112 MAGUINDANAO 84 92.3
## 113 MARINDUQUE 84 92.3
## 114 SORSOGON 84 92.3
## 115 TAWI-TAWI 84 92.3
## 116 CATANDUANES 83 91.2
## 117 CAMIGUIN 82 90.1
## 118 PATEROS 81 89.0
## 119 DINAGAT ISLANDS 78 85.7
## 120 LUCENA CITY 77 84.6
## 121 BATANES 51 56.0
summary(reports$Percent)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 56.04 97.80 100.00 97.91 100.00 101.10
%>% ungroup() %>% count(Percent, sort=TRUE) reports
## # A tibble: 14 × 2
## Percent n
## <dbl> <int>
## 1 100 71
## 2 98.9 15
## 3 95.6 7
## 4 92.3 6
## 5 94.5 6
## 6 97.8 5
## 7 96.7 4
## 8 56.0 1
## 9 84.6 1
## 10 85.7 1
## 11 89.0 1
## 12 90.1 1
## 13 91.2 1
## 14 101. 1
#Highest Recorded cases (>3000)
<-data %>%
highest_casesgroup_by(Province) %>%
arrange(desc(Cases)) %>%
select(Province,Region_name,Cases, Date) %>%
filter(Cases>3000)
print(highest_cases)
## # A tibble: 14 × 4
## # Groups: Province [6]
## Province Region_name Cases Date
## <chr> <chr> <int> <chr>
## 1 ILOILO Region VI (Western Visayas) 8934 2019-07
## 2 LAGUNA Region IV-A (Calabarzon) 6138 2019-08
## 3 BULACAN Region III (Central Luzon) 5434 2022-07
## 4 CAVITE Region IV-A (Calabarzon) 5004 2019-08
## 5 ILOILO Region VI (Western Visayas) 4847 2019-08
## 6 LAGUNA Region IV-A (Calabarzon) 4562 2019-09
## 7 CAVITE Region IV-A (Calabarzon) 4166 2019-09
## 8 CAVITE Region IV-A (Calabarzon) 3817 2019-07
## 9 ILOILO Region VI (Western Visayas) 3492 2019-06
## 10 NUEVA ECIJA Region III (Central Luzon) 3459 2022-07
## 11 BULACAN Region III (Central Luzon) 3419 2022-06
## 12 BATANGAS Region IV-A (Calabarzon) 3305 2019-08
## 13 LAGUNA Region IV-A (Calabarzon) 3088 2019-07
## 14 BULACAN Region III (Central Luzon) 3081 2022-08
#Calculate incidence
<-data %>%
incidencemutate(Incidence = Cases/Population, Incidence_per_100000 = Incidence*100000)
str(incidence)
## 'data.frame': 10781 obs. of 14 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ Month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : chr "2017-01" "2017-01" "2017-01" "2017-01" ...
## $ Region : chr "1" "1" "1" "1" ...
## $ Region_name : chr "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" ...
## $ Province : chr "DAGUPAN CITY" "ILOCOS NORTE" "ILOCOS SUR" "LA UNION" ...
## $ Location_ID : int 126 143 144 153 188 29 103 149 178 194 ...
## $ Cases : int 22 16 38 71 118 133 8 98 14 15 ...
## $ X.long : num 120 121 121 120 120 ...
## $ Y.lat : num 16.1 18.2 17.2 16.6 16 ...
## $ Population : int 171460 594105 690674 788851 2969273 1203559 135654 1599858 454993 189888 ...
## $ Incidence : num 1.28e-04 2.69e-05 5.50e-05 9.00e-05 3.97e-05 ...
## $ Incidence_per_100000: num 12.83 2.69 5.5 9 3.97 ...
#Highest Incidence Per 100,000 population (>300)
<-incidence %>%
highest_incidencearrange(desc(Incidence_per_100000)) %>%
select(Province,Region_name,Incidence_per_100000, Date) %>%
filter(Incidence_per_100000>300)
print(highest_incidence)
## Province Region_name Incidence_per_100000
## 1 BATANES Region II (Cagayan Valley) 2717.4225
## 2 BATANES Region II (Cagayan Valley) 1495.7877
## 3 BATANES Region II (Cagayan Valley) 1005.3920
## 4 BATANES Region II (Cagayan Valley) 969.1478
## 5 APAYAO Cordillera Administrative Region (CAR) 540.8763
## 6 GUIMARAS Region VI (Western Visayas) 540.0301
## 7 BATANES Region II (Cagayan Valley) 464.6627
## 8 ILOILO Region VI (Western Visayas) 456.7389
## 9 BATANES Region II (Cagayan Valley) 443.8701
## 10 KALINGA Cordillera Administrative Region (CAR) 442.3214
## 11 APAYAO Cordillera Administrative Region (CAR) 427.3327
## 12 TACLOBAN CITY Region VIII (Eastern Visayas) 418.8546
## 13 KALINGA Cordillera Administrative Region (CAR) 380.6646
## 14 TACLOBAN CITY Region VIII (Eastern Visayas) 354.8048
## 15 APAYAO Cordillera Administrative Region (CAR) 354.5659
## 16 AKLAN Region VI (Western Visayas) 329.3868
## 17 GUIMARAS Region VI (Western Visayas) 310.3641
## Date
## 1 2018-07
## 2 2018-08
## 3 2023-10
## 4 2018-06
## 5 2022-06
## 6 2019-07
## 7 2018-05
## 8 2019-07
## 9 2023-09
## 10 2022-07
## 11 2022-07
## 12 2019-07
## 13 2022-08
## 14 2019-08
## 15 2022-05
## 16 2019-07
## 17 2019-08
# Convert Year and Month to factors
$Year <- as.factor(data$Year)
data$Month <- as.factor(data$Month)
data
# Convert Province to factor
$Province <- as.factor(data$Province)
data
# Create a Date column from Year and Month (keeping it continuous)
$Date <- as.Date(paste(data$Year, data$Month, "01", sep = "-"))
data
str(data)
## 'data.frame': 10781 obs. of 12 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : Factor w/ 8 levels "2017","2018",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Month : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : Date, format: "2017-01-01" "2017-01-01" ...
## $ Region : chr "1" "1" "1" "1" ...
## $ Region_name: chr "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" ...
## $ Province : Factor w/ 121 levels "ABRA","AGUSAN DEL NORTE",..: 49 62 63 68 94 22 45 66 87 100 ...
## $ Location_ID: int 126 143 144 153 188 29 103 149 178 194 ...
## $ Cases : int 22 16 38 71 118 133 8 98 14 15 ...
## $ X.long : num 120 121 121 120 120 ...
## $ Y.lat : num 16.1 18.2 17.2 16.6 16 ...
## $ Population : int 171460 594105 690674 788851 2969273 1203559 135654 1599858 454993 189888 ...
#Plot Cases per province
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
<- ggplot(data, aes(x = Date, y = Cases, group = Province, color = Province)) +
p geom_line() +
labs(title = "Philippine Dengue Cases per Province",
x = "Date",
y = "Number of Cases") +
scale_y_continuous(labels = comma) +
scale_x_date(date_labels = "%b %Y", date_breaks = "2 months") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
ggplotly(p) # Make the plot interactive
#Plot incidence per province
# If your Date column is in "YYYY-MM" format, convert it as follows:
$Date <- as.Date(paste0(incidence$Date, "-01"))
incidence
# Then proceed with your ggplot and plotly code
<- ggplot(incidence, aes(x = Date, y = Incidence_per_100000, group = Province, color = Province)) +
p1 geom_line() +
geom_hline(yintercept = 300, linetype = "dashed", color = "black", size = 0.75)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
labs(title = "Dengue Incidence per Province",
x = "Date",
y = "Incidence per 100,000 population") +
scale_y_continuous(labels = scales::comma) +
scale_x_date(date_labels = "%b %Y", date_breaks = "2 months") +
theme_minimal() + # Adding a clean theme
theme(plot.title = element_text(hjust = 0.5), # Center the plot title
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) # Rotate x-axis labels vertically
## NULL
# Convert the ggplot object to plotly
ggplotly(p1)
#incidence per yearmonth
<- incidence %>%
incidence_per_yearmonth group_by(Year, Month) %>%
summarize(Total=sum(Incidence_per_100000, na.rm=TRUE), Mean=mean(Incidence_per_100000, na.rm = TRUE), Median=median(Incidence_per_100000, na.rm=TRUE), Q3=quantile(Incidence_per_100000, probs=0.75, na.rm = TRUE), Q1=quantile(Incidence_per_100000, probs = 0.25, na.rm=TRUE), IQR=Q3-Q1)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
print(incidence_per_yearmonth, n=91)
## # A tibble: 91 × 8
## # Groups: Year [8]
## Year Month Total Mean Median Q3 Q1 IQR
## <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017 1 1818. 15.4 11.2 18.7 6.16 12.5
## 2 2017 2 1146. 9.63 7.11 13.4 3.69 9.68
## 3 2017 3 904. 7.66 5.76 9.71 2.89 6.82
## 4 2017 4 659. 5.68 4.16 7.23 2.15 5.08
## 5 2017 5 695. 5.94 4.14 7.28 2.40 4.89
## 6 2017 6 1185. 9.96 6.42 12.9 3.82 9.03
## 7 2017 7 2190. 18.6 12.7 23.5 6.99 16.5
## 8 2017 8 2846. 23.5 18.8 30.2 6.98 23.3
## 9 2017 9 1967. 16.7 14.5 23.5 6.14 17.4
## 10 2017 10 1708. 14.4 12.0 20.1 4.62 15.5
## 11 2017 11 1558. 13.2 10.6 18.1 4.84 13.3
## 12 2017 12 1169. 9.75 7.93 12.3 3.87 8.39
## 13 2018 1 1727. 14.3 12.2 17.6 6.80 10.8
## 14 2018 2 1171. 9.75 8.23 12.7 4.21 8.47
## 15 2018 3 1068. 8.83 6.66 10.3 3.86 6.45
## 16 2018 4 880. 7.34 4.80 7.55 2.92 4.63
## 17 2018 5 1537. 12.9 6.18 12.3 4.41 7.93
## 18 2018 6 2981. 24.8 12.6 22.1 6.87 15.2
## 19 2018 7 6442. 53.7 22.3 42.1 12.1 30.0
## 20 2018 8 6247. 51.6 29.5 51.5 16.8 34.8
## 21 2018 9 4410. 36.4 29.1 41.2 16.6 24.6
## 22 2018 10 4609. 38.1 29.9 44.6 17.0 27.6
## 23 2018 11 4257. 35.2 25.1 41.4 15.2 26.2
## 24 2018 12 3717. 30.7 18.2 36.4 11.9 24.5
## 25 2019 1 4386. 36.6 29.2 44.8 17.8 26.9
## 26 2019 2 2722. 22.5 19.8 28.1 9.97 18.1
## 27 2019 3 1830. 15.3 13.1 21.5 6.02 15.4
## 28 2019 4 1277. 10.5 7.40 13.0 4.03 8.98
## 29 2019 5 1785. 14.8 9.25 16.6 4.73 11.9
## 30 2019 6 4067. 33.6 20.6 40.7 10.4 30.4
## 31 2019 7 9679. 80.0 52.7 98.9 27.4 71.4
## 32 2019 8 10464. 87.2 78.6 112. 48.9 63.3
## 33 2019 9 7070. 58.4 57.2 76.2 38.2 38.0
## 34 2019 10 5049. 41.7 37.7 55.4 24.3 31.0
## 35 2019 11 3292. 27.2 24.3 36.7 13.7 23.0
## 36 2019 12 2016. 16.7 12.2 22.9 5.84 17.1
## 37 2020 1 3179. 26.5 19.3 33.5 10.5 23.1
## 38 2020 2 1883. 15.6 12.3 20.8 7.18 13.6
## 39 2020 3 920. 7.73 4.77 10.1 2.63 7.48
## 40 2020 4 316. 2.93 1.61 4.49 0.760 3.73
## 41 2020 5 305. 2.78 1.74 3.81 0.585 3.23
## 42 2020 6 455. 4.10 2.50 5.86 1.04 4.82
## 43 2020 7 746. 6.54 3.44 9.12 1.40 7.72
## 44 2020 8 791. 6.70 3.45 6.79 1.50 5.29
## 45 2020 9 687. 5.88 2.31 5.59 1.33 4.26
## 46 2020 10 615. 5.35 2.87 5.51 1.15 4.35
## 47 2020 11 673. 5.96 3.36 6.14 1.30 4.84
## 48 2020 12 521. 4.69 2.22 5.03 1.13 3.90
## 49 2021 1 915. 7.69 4.21 11.2 1.70 9.48
## 50 2021 2 720. 6.10 3.77 8.00 1.37 6.63
## 51 2021 3 651. 5.66 3.25 6.11 1.65 4.46
## 52 2021 4 452. 3.97 2.17 4.20 1.15 3.05
## 53 2021 5 480. 4.21 2.20 3.93 1.07 2.86
## 54 2021 6 739. 6.54 2.76 5.36 1.41 3.95
## 55 2021 7 1256. 11.0 4.89 10.0 2.32 7.71
## 56 2021 8 1106. 9.62 4.56 10.5 2.05 8.43
## 57 2021 9 759. 6.49 3.76 7.46 1.68 5.78
## 58 2021 10 895. 7.85 5.22 10.3 2.33 8.02
## 59 2021 11 1074. 9.18 5.19 13.5 2.55 11.0
## 60 2021 12 734. 6.44 3.82 8.51 1.66 6.85
## 61 2022 1 744. 6.30 4.66 8.70 2.51 6.19
## 62 2022 2 783. 6.52 4.96 8.02 2.58 5.43
## 63 2022 3 1112. 9.34 6.23 11.1 3.85 7.22
## 64 2022 4 1692. 14.0 7.85 17.3 4.67 12.7
## 65 2022 5 3058. 25.3 15.6 29.1 7.74 21.3
## 66 2022 6 5059. 41.8 23.9 46.3 13.8 32.5
## 67 2022 7 6516. 53.9 34.4 51.5 19.5 32.0
## 68 2022 8 5455. 45.1 29.1 47.8 19.0 28.9
## 69 2022 9 4272. 35.3 25.1 39.7 16.3 23.4
## 70 2022 10 3032. 25.1 18.3 27.8 11.9 15.8
## 71 2022 11 2828. 23.4 18.1 27.5 10.6 16.9
## 72 2022 12 1947. 16.1 11.4 19.8 6.79 13.1
## 73 2023 1 2013. 16.8 13.3 22.3 7.42 14.9
## 74 2023 2 1540. 12.7 10.0 17.9 6.05 11.8
## 75 2023 3 1351. 11.3 8.35 14.5 5.44 9.05
## 76 2023 4 1275. 10.6 7.34 13.8 4.70 9.13
## 77 2023 5 1864. 15.7 8.57 18.3 5.08 13.2
## 78 2023 6 2605. 21.9 14.5 26.7 8.03 18.6
## 79 2023 7 3472. 28.7 16.2 32.4 11.5 20.9
## 80 2023 8 3687. 30.5 18.5 37.7 11.9 25.8
## 81 2023 9 4148. 34.3 21.9 40.2 11.4 28.9
## 82 2023 10 4711. 38.9 23.1 38.4 11.8 26.6
## 83 2023 11 3490. 28.8 21.8 36.7 12.5 24.2
## 84 2023 12 2624. 21.9 17.0 27.8 9.75 18.1
## 85 2024 1 2701. 22.3 16.0 30.8 9.86 20.9
## 86 2024 2 2062. 17.2 12.8 23.2 7.06 16.2
## 87 2024 3 1801. 15.0 10.9 22.6 5.33 17.3
## 88 2024 4 1562. 13.3 8.49 19.3 4.57 14.7
## 89 2024 5 1840. 15.9 10.1 22.4 4.23 18.1
## 90 2024 6 3101. 26.5 14.0 30.7 6.03 24.6
## 91 2024 7 339. 3.29 1.45 3.98 0.785 3.20
#incidence per month
<- incidence %>%
incidence_per_month group_by(Month) %>%
summarize(Total=sum(Incidence_per_100000, na.rm=TRUE), Mean=mean(Incidence_per_100000, na.rm = TRUE), Median=median(Incidence_per_100000, na.rm=TRUE), Q3=quantile(Incidence_per_100000, probs=0.75, na.rm = TRUE), Q1=quantile(Incidence_per_100000, probs = 0.25, na.rm=TRUE), IQR=Q3-Q1, SD=sd(Incidence_per_100000, na.rm=TRUE), lower_CI=Mean-SD, Upper_CI=Mean+SD)
print(incidence_per_month, n=12)
## # A tibble: 12 × 10
## Month Total Mean Median Q3 Q1 IQR SD lower_CI Upper_CI
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 17484. 18.3 12.3 23.7 5.80 17.9 20.2 -1.96 38.5
## 2 2 12025. 12.5 8.70 16.5 4.39 12.1 12.4 0.142 24.9
## 3 3 9637. 10.1 6.77 12.8 3.50 9.25 10.8 -0.712 21.0
## 4 4 8113. 8.66 5.26 10.0 2.53 7.50 12.2 -3.52 20.8
## 5 5 11566. 12.3 6.11 14.0 2.79 11.2 24.0 -11.6 36.3
## 6 6 20191. 21.5 10.3 23.4 4.06 19.3 45.8 -24.3 67.3
## 7 7 30639. 32.9 14.0 35.1 4.08 31.0 102. -68.9 135.
## 8 8 30595. 36.6 20.1 46.2 6.32 39.9 67.6 -31.0 104.
## 9 9 23314. 27.9 17.5 37.5 5.03 32.4 35.3 -7.42 63.2
## 10 10 20620. 24.8 16.1 32.9 5.48 27.4 44.0 -19.2 68.8
## 11 11 17172. 20.6 14.7 27.0 5.24 21.7 26.2 -5.57 46.9
## 12 12 12729. 15.4 9.52 18.8 3.69 15.1 21.0 -5.67 36.4
#Plot incidence per month
ggplot(incidence_per_month, aes(x=Month, y=Median)) +
geom_point(size=4, colour = "blue") +
geom_line() +
geom_errorbar(aes(ymin = Q1, ymax = Q3), width = 0.2, colour = "red") + # Add error bars
labs(title = "Monthly Incidence of Dengue in the Philippines, 2017- July 2024",
x = "Month",
y = "Median incidence per 100,000 population") +
theme_minimal() + # Adding a clean theme
theme(plot.title = element_text(hjust = 0.5)) #Center the plot title
#Plot polar coordinates
ggplot(incidence_per_yearmonth, aes(x=Month, y = Mean, group=Year, color=Year)) +
geom_line(aes(linewidth = 0.5)) +
coord_polar(start = -pi/12)
Including Plots
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.