Desciptive Statistics

2024-08-13

Dr. Kenny Oriel Olana

#TABLES
##Read data
data<-read.csv("/Users/kennyorielolana/Library/CloudStorage/OneDrive-ChiangMaiUniversity/Dissertation-Kenny’s MacBook Air/Analysis/clean_data.csv")

library(ggplot2)
library(ggthemes)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
#Data structure
str(data)
## 'data.frame':    10781 obs. of  12 variables:
##  $ ID         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year       : int  2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
##  $ Month      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date       : chr  "2017-01" "2017-01" "2017-01" "2017-01" ...
##  $ Region     : chr  "1" "1" "1" "1" ...
##  $ Region_name: chr  "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" ...
##  $ Province   : chr  "DAGUPAN CITY" "ILOCOS NORTE" "ILOCOS SUR" "LA UNION" ...
##  $ Location_ID: int  126 143 144 153 188 29 103 149 178 194 ...
##  $ Cases      : int  22 16 38 71 118 133 8 98 14 15 ...
##  $ X.long     : num  120 121 121 120 120 ...
##  $ Y.lat      : num  16.1 18.2 17.2 16.6 16 ...
##  $ Population : int  171460 594105 690674 788851 2969273 1203559 135654 1599858 454993 189888 ...
#Summary for all observation
summary(data$Cases)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1      20      58     147     162    8934
#No. of cases per year
case_per_year<-data %>%
  group_by(Year) %>%
  summarize(Total=sum(Cases), Median=median(Cases), Q3=quantile(Cases, probs=0.75), Q1=quantile(Cases, probs = 0.25), IQR=Q3-Q1)

print(case_per_year)
## # A tibble: 8 × 6
##    Year  Total Median    Q3    Q1   IQR
##   <int>  <int>  <dbl> <dbl> <dbl> <dbl>
## 1  2017 152159   46   118      19  99  
## 2  2018 249398   78.5 212      31 181  
## 3  2019 437089  134   324      49 275  
## 4  2020  88636   23    60       7  53  
## 5  2021  80919   22    61.2     8  53.2
## 6  2022 264390   88   193      31 162  
## 7  2023 216874   89   189.     35 154. 
## 8  2024  95723   57   160.     21 139.
#Average annual cases (2017-July 2024)
summary(case_per_year$Total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   80919   93951  184516  198148  253146  437089
sd(case_per_year$Total)
## [1] 121184.3
ggplot(case_per_year, aes(Year,Total)) +
  geom_point(size=4, colour = "blue") +
  geom_line() +labs(title = "Dengue Cases in the Philippines, 2017- July 2024",
     x = "Year",
     y = "Number of Cases") +
  scale_y_continuous(labels = comma) +  # Format y-axis labels to 6-digit numbers
  scale_x_continuous(breaks = 2017:2024) +  # Ensure x-axis shows all years
  theme_minimal() +   # Adding a clean theme
  theme(plot.title = element_text(hjust = 0.5)) #Center the plot title

#No. of cases per year per month
case_by_yearmonth<-data %>%
  group_by(Year, Month) %>%
  summarize(Total=sum(Cases), Mean=mean(Cases), Median=median(Cases), Q3=quantile(Cases, probs=0.75), Q1=quantile(Cases, probs = 0.25), IQR=Q3-Q1) %>%
  mutate(Month=as.factor(Month))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
print(case_by_yearmonth, n=91)
## # A tibble: 91 × 8
## # Groups:   Year [8]
##     Year Month Total  Mean Median    Q3     Q1   IQR
##    <int> <fct> <int> <dbl>  <dbl> <dbl>  <dbl> <dbl>
##  1  2017 1     14217 120.    64.5 154    26.2  128. 
##  2  2017 2      9306  78.2   41    96    16.5   79.5
##  3  2017 3      7425  62.9   35    72.8  13     59.8
##  4  2017 4      5157  44.5   25.5  55.5  11     44.5
##  5  2017 5      5547  47.4   26    58    11     47  
##  6  2017 6      9800  82.4   43    93    16     77  
##  7  2017 7     18278 155.    71.5 169.   29.2  140. 
##  8  2017 8     24961 206.    96   241    33    208  
##  9  2017 9     17365 147.    64.5 158.   22    136. 
## 10  2017 10    15018 126.    59   124.   22    102. 
## 11  2017 11    14156 120.    55   116    25     91  
## 12  2017 12    10929  91.1   40    88    18     70  
## 13  2018 1     14253 118.    66   108    33     75  
## 14  2018 2      9727  81.1   41    84.2  20     64.2
## 15  2018 3      7822  64.6   38    72    19     53  
## 16  2018 4      5798  48.3   30    59    14.8   44.2
## 17  2018 5      8067  67.8   44    86    20     66  
## 18  2018 6     15545 130.    85   171.   30    141. 
## 19  2018 7     30555 255.   152.  315    49.5  266. 
## 20  2018 8     37919 313.   202   394    66    328  
## 21  2018 9     31459 260.   164   310    68    242  
## 22  2018 10    33011 273.   161   354    75    279  
## 23  2018 11    30238 250.   129   348    56    292  
## 24  2018 12    25004 207.   112   317    44    273  
## 25  2019 1     32459 270.   164.  378.   79.2  299. 
## 26  2019 2     20271 168.   117   229    47    182  
## 27  2019 3     14053 117.    79   167    33.8  133. 
## 28  2019 4      9729  80.4   45   107    19     88  
## 29  2019 5     13946 115.    54   124    22    102  
## 30  2019 6     32901 272.   124   328    53    275  
## 31  2019 7     78436 648.   295   917   138    779  
## 32  2019 8     90284 752.   427   855.  190    665. 
## 33  2019 9     61711 510.   293   565   147    418  
## 34  2019 10    40988 339.   204   402    96    306  
## 35  2019 11    25918 214.   111   261    58    203  
## 36  2019 12    16393 135.    67   160    29    131  
## 37  2020 1     25307 211.   110.  233    54.8  178. 
## 38  2020 2     14657 121.    66   155    33    122  
## 39  2020 3      6771  56.9   31    76    12     64  
## 40  2020 4      2227  20.6   11.5  29.5   4     25.5
## 41  2020 5      2312  21.0   10    29.5   4     25.5
## 42  2020 6      3672  33.1   16    44     5     39  
## 43  2020 7      6124  53.7   23    63.8   6     57.8
## 44  2020 8      6047  51.2   20    50     7.25  42.8
## 45  2020 9      5135  43.9   14    39     6     33  
## 46  2020 10     4946  43.0   15    41.5   6     35.5
## 47  2020 11     6130  54.2   18    52     6     46  
## 48  2020 12     5308  47.8   12    38     5     33  
## 49  2021 1      9124  76.7   23    68     9     59  
## 50  2021 2      6962  59     19    56.2   6.25  50  
## 51  2021 3      5894  51.3   19    51.5   6     45.5
## 52  2021 4      3803  33.4   13    42     6     36  
## 53  2021 5      3883  34.1   13    44     5     39  
## 54  2021 6      5564  49.2   17    54     6     48  
## 55  2021 7      9601  84.2   31    82.8  10.2   72.5
## 56  2021 8      8847  76.9   27    73.5  10.5   63  
## 57  2021 9      5694  48.7   20    54     8     46  
## 58  2021 10     7001  61.4   28    68    11.2   56.8
## 59  2021 11     8836  75.5   31    90    16     74  
## 60  2021 12     5710  50.1   23    63.5  10     53.5
## 61  2022 1      5561  47.1   29.5  58.8  12     46.8
## 62  2022 2      5945  49.5   30.5  67.5  11     56.5
## 63  2022 3      8169  68.6   40    97    15     82  
## 64  2022 4     11698  96.7   57   132    20    112  
## 65  2022 5     20927 173.   101   234    33    201  
## 66  2022 6     35204 291.   163   384    60    324  
## 67  2022 7     49626 410.   170   453    90    363  
## 68  2022 8     41986 347.   174   383    88    295  
## 69  2022 9     31496 260.   155   289    69    220  
## 70  2022 10    21705 179.   110   216    51    165  
## 71  2022 11    18977 157.   114   185    48    137  
## 72  2022 12    13096 108.    69   132    31    101  
## 73  2023 1     13875 116.    76   161.   34.8  126  
## 74  2023 2     11099  91.7   61   127    21    106  
## 75  2023 3     10124  84.4   54   118.   20     98.5
## 76  2023 4      9264  77.2   47   105    18.8   86.2
## 77  2023 5     13013 109.    57   154.   24    130. 
## 78  2023 6     19427 163.    95   223    36.5  186. 
## 79  2023 7     23288 192.   119   260    43    217  
## 80  2023 8     24022 199.   119   288    48    240  
## 81  2023 9     25231 209.   128   313    54    259  
## 82  2023 10    26073 215.   144   315    59    256  
## 83  2023 11    23011 190.   132   239    55    184  
## 84  2023 12    18447 154.    95   212.   49.2  163. 
## 85  2024 1     19050 157.    95   210    44    166  
## 86  2024 2     15473 129.    82   188.   26.5  162  
## 87  2024 3     13986 117.    56.5 166.   24.5  141. 
## 88  2024 4     11503  98.3   51   127    23    104  
## 89  2024 5     13037 112.    58   168.   24.5  144. 
## 90  2024 6     20447 175.   102   281    28    253  
## 91  2024 7      2227  21.6   12    28.5   4     24.5
#Plot monthly cases by year 
ggplot(case_by_yearmonth, aes(Month,Total)) +
geom_col(width = 1, fill = "darkred") +       # plot the count data as columns
  theme_minimal()+                              # simplify the background panels
  labs(                                         # add plot labels, title, etc.
    x = "Month",
    y = "Dengue cases",
    title = "Dengue cases by year-month") +
  facet_wrap(~Year)                       # the facets are created)

#Box plot of cases per month
ggplot(case_by_yearmonth, aes(x=Month, y=Total)) +
  geom_boxplot() +
  labs(                   
    x = "Month",
    y = "Dengue cases",
    title = "Dengue cases by Month (2017-July 2024)")

#Reports per Province/city Epidemiology units & percentage of reporting
reports<-data %>% 
  group_by(Province) %>%
  count() %>%
  arrange(desc(n)) %>%
  mutate(Percent=n/91*100)

print(reports, n=121)
## # A tibble: 121 × 3
## # Groups:   Province [121]
##     Province                            n Percent
##     <chr>                           <int>   <dbl>
##   1 LEYTE                              92   101. 
##   2 AGUSAN DEL NORTE                   91   100  
##   3 AGUSAN DEL SUR                     91   100  
##   4 ANGELES CITY                       91   100  
##   5 ANTIQUE                            91   100  
##   6 AURORA                             91   100  
##   7 BACOLOD CITY                       91   100  
##   8 BATAAN                             91   100  
##   9 BATANGAS                           91   100  
##  10 BENGUET                            91   100  
##  11 BOHOL                              91   100  
##  12 BUKIDNON                           91   100  
##  13 BULACAN                            91   100  
##  14 CAGAYAN DE ORO CITY                91   100  
##  15 CALOOCAN CITY                      91   100  
##  16 CAMARINES SUR                      91   100  
##  17 CAPIZ                              91   100  
##  18 CAVITE                             91   100  
##  19 CEBU                               91   100  
##  20 CEBU CITY                          91   100  
##  21 CITY OF ISABELA                    91   100  
##  22 CITY OF MAKATI                     91   100  
##  23 CITY OF MALABON                    91   100  
##  24 CITY OF MANDALUYONG                91   100  
##  25 CITY OF MANILA                     91   100  
##  26 CITY OF MARIKINA                   91   100  
##  27 CITY OF VALENZUELA                 91   100  
##  28 COTABATO (NORTH COTABATO)          91   100  
##  29 COTABATO CITY                      91   100  
##  30 DAGUPAN CITY                       91   100  
##  31 DAVAO CITY                         91   100  
##  32 DAVAO DE ORO                       91   100  
##  33 DAVAO DEL NORTE                    91   100  
##  34 DAVAO DEL SUR                      91   100  
##  35 GENERAL SANTOS CITY (DADIANGAS)    91   100  
##  36 IFUGAO                             91   100  
##  37 ILIGAN CITY                        91   100  
##  38 ILOCOS NORTE                       91   100  
##  39 ILOCOS SUR                         91   100  
##  40 ILOILO                             91   100  
##  41 ISABELA                            91   100  
##  42 KALINGA                            91   100  
##  43 LA UNION                           91   100  
##  44 LAGUNA                             91   100  
##  45 LANAO DEL NORTE                    91   100  
##  46 MANDAUE CITY                       91   100  
##  47 MISAMIS OCCIDENTAL                 91   100  
##  48 MISAMIS ORIENTAL                   91   100  
##  49 NEGROS OCCIDENTAL                  91   100  
##  50 NEGROS ORIENTAL                    91   100  
##  51 NUEVA ECIJA                        91   100  
##  52 NUEVA VIZCAYA                      91   100  
##  53 OCCIDENTAL MINDORO                 91   100  
##  54 ORIENTAL MINDORO                   91   100  
##  55 PALAWAN                            91   100  
##  56 PAMPANGA                           91   100  
##  57 PANGASINAN                         91   100  
##  58 PUERTO PRINCESA CITY               91   100  
##  59 QUEZON                             91   100  
##  60 QUEZON CITY                        91   100  
##  61 QUIRINO                            91   100  
##  62 RIZAL                              91   100  
##  63 ROMBLON                            91   100  
##  64 SARANGANI                          91   100  
##  65 SOUTH COTABATO                     91   100  
##  66 SURIGAO DEL NORTE                  91   100  
##  67 SURIGAO DEL SUR                    91   100  
##  68 TARLAC                             91   100  
##  69 ZAMBOANGA CITY                     91   100  
##  70 ZAMBOANGA DEL NORTE                91   100  
##  71 ZAMBOANGA DEL SUR                  91   100  
##  72 ZAMBOANGA SIBUGAY                  91   100  
##  73 APAYAO                             90    98.9
##  74 BAGUIO CITY                        90    98.9
##  75 CAGAYAN                            90    98.9
##  76 CITY OF LAS PINAS                  90    98.9
##  77 CITY OF PARANAQUE                  90    98.9
##  78 CITY OF PASIG                      90    98.9
##  79 DAVAO ORIENTAL                     90    98.9
##  80 ILOILO CITY                        90    98.9
##  81 NAGA CITY                          90    98.9
##  82 ORMOC CITY                         90    98.9
##  83 PASAY CITY                         90    98.9
##  84 SAMAR (WESTERN SAMAR)              90    98.9
##  85 SULTAN KUDARAT                     90    98.9
##  86 TACLOBAN CITY                      90    98.9
##  87 ZAMBALES                           90    98.9
##  88 BASILAN                            89    97.8
##  89 BUTUAN CITY                        89    97.8
##  90 LAPU-LAPU CITY (OPON)              89    97.8
##  91 NORTHERN SAMAR                     89    97.8
##  92 TAGUIG CITY                        89    97.8
##  93 CAMARINES NORTE                    88    96.7
##  94 OLONGAPO CITY                      88    96.7
##  95 SIQUIJOR                           88    96.7
##  96 SOUTHERN LEYTE                     88    96.7
##  97 ABRA                               87    95.6
##  98 BILIRAN                            87    95.6
##  99 CITY OF MUNTINLUPA                 87    95.6
## 100 CITY OF NAVOTAS                    87    95.6
## 101 CITY OF SAN JUAN                   87    95.6
## 102 EASTERN SAMAR                      87    95.6
## 103 MOUNTAIN PROVINCE                  87    95.6
## 104 AKLAN                              86    94.5
## 105 CITY OF SANTIAGO                   86    94.5
## 106 GUIMARAS                           86    94.5
## 107 LANAO DEL SUR                      86    94.5
## 108 MASBATE                            86    94.5
## 109 SULU                               86    94.5
## 110 ALBAY                              84    92.3
## 111 DAVAO OCCIDENTAL                   84    92.3
## 112 MAGUINDANAO                        84    92.3
## 113 MARINDUQUE                         84    92.3
## 114 SORSOGON                           84    92.3
## 115 TAWI-TAWI                          84    92.3
## 116 CATANDUANES                        83    91.2
## 117 CAMIGUIN                           82    90.1
## 118 PATEROS                            81    89.0
## 119 DINAGAT ISLANDS                    78    85.7
## 120 LUCENA CITY                        77    84.6
## 121 BATANES                            51    56.0
summary(reports$Percent) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   56.04   97.80  100.00   97.91  100.00  101.10
reports %>% ungroup() %>% count(Percent, sort=TRUE)
## # A tibble: 14 × 2
##    Percent     n
##      <dbl> <int>
##  1   100      71
##  2    98.9    15
##  3    95.6     7
##  4    92.3     6
##  5    94.5     6
##  6    97.8     5
##  7    96.7     4
##  8    56.0     1
##  9    84.6     1
## 10    85.7     1
## 11    89.0     1
## 12    90.1     1
## 13    91.2     1
## 14   101.      1
#Highest Recorded cases (>3000)
highest_cases<-data %>%
  group_by(Province) %>%
  arrange(desc(Cases)) %>%
  select(Province,Region_name,Cases, Date) %>%
  filter(Cases>3000)

print(highest_cases)
## # A tibble: 14 × 4
## # Groups:   Province [6]
##    Province    Region_name                 Cases Date   
##    <chr>       <chr>                       <int> <chr>  
##  1 ILOILO      Region VI (Western Visayas)  8934 2019-07
##  2 LAGUNA      Region IV-A (Calabarzon)     6138 2019-08
##  3 BULACAN     Region III (Central Luzon)   5434 2022-07
##  4 CAVITE      Region IV-A (Calabarzon)     5004 2019-08
##  5 ILOILO      Region VI (Western Visayas)  4847 2019-08
##  6 LAGUNA      Region IV-A (Calabarzon)     4562 2019-09
##  7 CAVITE      Region IV-A (Calabarzon)     4166 2019-09
##  8 CAVITE      Region IV-A (Calabarzon)     3817 2019-07
##  9 ILOILO      Region VI (Western Visayas)  3492 2019-06
## 10 NUEVA ECIJA Region III (Central Luzon)   3459 2022-07
## 11 BULACAN     Region III (Central Luzon)   3419 2022-06
## 12 BATANGAS    Region IV-A (Calabarzon)     3305 2019-08
## 13 LAGUNA      Region IV-A (Calabarzon)     3088 2019-07
## 14 BULACAN     Region III (Central Luzon)   3081 2022-08
#Calculate incidence 
incidence<-data %>%
  mutate(Incidence = Cases/Population, Incidence_per_100000 = Incidence*100000)

str(incidence)
## 'data.frame':    10781 obs. of  14 variables:
##  $ ID                  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year                : int  2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
##  $ Month               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date                : chr  "2017-01" "2017-01" "2017-01" "2017-01" ...
##  $ Region              : chr  "1" "1" "1" "1" ...
##  $ Region_name         : chr  "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" ...
##  $ Province            : chr  "DAGUPAN CITY" "ILOCOS NORTE" "ILOCOS SUR" "LA UNION" ...
##  $ Location_ID         : int  126 143 144 153 188 29 103 149 178 194 ...
##  $ Cases               : int  22 16 38 71 118 133 8 98 14 15 ...
##  $ X.long              : num  120 121 121 120 120 ...
##  $ Y.lat               : num  16.1 18.2 17.2 16.6 16 ...
##  $ Population          : int  171460 594105 690674 788851 2969273 1203559 135654 1599858 454993 189888 ...
##  $ Incidence           : num  1.28e-04 2.69e-05 5.50e-05 9.00e-05 3.97e-05 ...
##  $ Incidence_per_100000: num  12.83 2.69 5.5 9 3.97 ...
#Highest Incidence Per 100,000 population (>300) 
highest_incidence<-incidence %>%
  arrange(desc(Incidence_per_100000)) %>%
  select(Province,Region_name,Incidence_per_100000, Date) %>%
  filter(Incidence_per_100000>300)

print(highest_incidence)
##         Province                            Region_name Incidence_per_100000
## 1        BATANES             Region II (Cagayan Valley)            2717.4225
## 2        BATANES             Region II (Cagayan Valley)            1495.7877
## 3        BATANES             Region II (Cagayan Valley)            1005.3920
## 4        BATANES             Region II (Cagayan Valley)             969.1478
## 5         APAYAO Cordillera Administrative Region (CAR)             540.8763
## 6       GUIMARAS            Region VI (Western Visayas)             540.0301
## 7        BATANES             Region II (Cagayan Valley)             464.6627
## 8         ILOILO            Region VI (Western Visayas)             456.7389
## 9        BATANES             Region II (Cagayan Valley)             443.8701
## 10       KALINGA Cordillera Administrative Region (CAR)             442.3214
## 11        APAYAO Cordillera Administrative Region (CAR)             427.3327
## 12 TACLOBAN CITY          Region VIII (Eastern Visayas)             418.8546
## 13       KALINGA Cordillera Administrative Region (CAR)             380.6646
## 14 TACLOBAN CITY          Region VIII (Eastern Visayas)             354.8048
## 15        APAYAO Cordillera Administrative Region (CAR)             354.5659
## 16         AKLAN            Region VI (Western Visayas)             329.3868
## 17      GUIMARAS            Region VI (Western Visayas)             310.3641
##       Date
## 1  2018-07
## 2  2018-08
## 3  2023-10
## 4  2018-06
## 5  2022-06
## 6  2019-07
## 7  2018-05
## 8  2019-07
## 9  2023-09
## 10 2022-07
## 11 2022-07
## 12 2019-07
## 13 2022-08
## 14 2019-08
## 15 2022-05
## 16 2019-07
## 17 2019-08
# Convert Year and Month to factors
data$Year <- as.factor(data$Year)
data$Month <- as.factor(data$Month)

# Convert Province to factor
data$Province <- as.factor(data$Province)

# Create a Date column from Year and Month (keeping it continuous)
data$Date <- as.Date(paste(data$Year, data$Month, "01", sep = "-"))

str(data)
## 'data.frame':    10781 obs. of  12 variables:
##  $ ID         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year       : Factor w/ 8 levels "2017","2018",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Month      : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Date       : Date, format: "2017-01-01" "2017-01-01" ...
##  $ Region     : chr  "1" "1" "1" "1" ...
##  $ Region_name: chr  "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" "Region I (Ilocos Region)" ...
##  $ Province   : Factor w/ 121 levels "ABRA","AGUSAN DEL NORTE",..: 49 62 63 68 94 22 45 66 87 100 ...
##  $ Location_ID: int  126 143 144 153 188 29 103 149 178 194 ...
##  $ Cases      : int  22 16 38 71 118 133 8 98 14 15 ...
##  $ X.long     : num  120 121 121 120 120 ...
##  $ Y.lat      : num  16.1 18.2 17.2 16.6 16 ...
##  $ Population : int  171460 594105 690674 788851 2969273 1203559 135654 1599858 454993 189888 ...
#Plot Cases per province
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
p <- ggplot(data, aes(x = Date, y = Cases, group = Province, color = Province)) +
  geom_line() +
  labs(title = "Philippine Dengue Cases per Province",
       x = "Date",
       y = "Number of Cases") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "2 months") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ggplotly(p)  # Make the plot interactive
#Plot incidence per province

# If your Date column is in "YYYY-MM" format, convert it as follows:
incidence$Date <- as.Date(paste0(incidence$Date, "-01"))

# Then proceed with your ggplot and plotly code
p1 <- ggplot(incidence, aes(x = Date, y = Incidence_per_100000, group = Province, color = Province)) +
  geom_line() +
  geom_hline(yintercept = 300, linetype = "dashed", color = "black", size = 0.75) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
  labs(title = "Dengue Incidence per Province",
       x = "Date",
       y = "Incidence per 100,000 population") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "2 months") +
  theme_minimal() +  # Adding a clean theme
  theme(plot.title = element_text(hjust = 0.5),  # Center the plot title
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))  # Rotate x-axis labels vertically
## NULL
# Convert the ggplot object to plotly
ggplotly(p1)
#incidence per yearmonth
incidence_per_yearmonth <- incidence %>%
  group_by(Year, Month) %>%
  summarize(Total=sum(Incidence_per_100000, na.rm=TRUE), Mean=mean(Incidence_per_100000, na.rm = TRUE), Median=median(Incidence_per_100000, na.rm=TRUE), Q3=quantile(Incidence_per_100000, probs=0.75, na.rm = TRUE), Q1=quantile(Incidence_per_100000, probs = 0.25, na.rm=TRUE), IQR=Q3-Q1) 
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
print(incidence_per_yearmonth, n=91)
## # A tibble: 91 × 8
## # Groups:   Year [8]
##     Year Month  Total  Mean Median     Q3     Q1   IQR
##    <int> <int>  <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
##  1  2017     1  1818. 15.4   11.2   18.7   6.16  12.5 
##  2  2017     2  1146.  9.63   7.11  13.4   3.69   9.68
##  3  2017     3   904.  7.66   5.76   9.71  2.89   6.82
##  4  2017     4   659.  5.68   4.16   7.23  2.15   5.08
##  5  2017     5   695.  5.94   4.14   7.28  2.40   4.89
##  6  2017     6  1185.  9.96   6.42  12.9   3.82   9.03
##  7  2017     7  2190. 18.6   12.7   23.5   6.99  16.5 
##  8  2017     8  2846. 23.5   18.8   30.2   6.98  23.3 
##  9  2017     9  1967. 16.7   14.5   23.5   6.14  17.4 
## 10  2017    10  1708. 14.4   12.0   20.1   4.62  15.5 
## 11  2017    11  1558. 13.2   10.6   18.1   4.84  13.3 
## 12  2017    12  1169.  9.75   7.93  12.3   3.87   8.39
## 13  2018     1  1727. 14.3   12.2   17.6   6.80  10.8 
## 14  2018     2  1171.  9.75   8.23  12.7   4.21   8.47
## 15  2018     3  1068.  8.83   6.66  10.3   3.86   6.45
## 16  2018     4   880.  7.34   4.80   7.55  2.92   4.63
## 17  2018     5  1537. 12.9    6.18  12.3   4.41   7.93
## 18  2018     6  2981. 24.8   12.6   22.1   6.87  15.2 
## 19  2018     7  6442. 53.7   22.3   42.1  12.1   30.0 
## 20  2018     8  6247. 51.6   29.5   51.5  16.8   34.8 
## 21  2018     9  4410. 36.4   29.1   41.2  16.6   24.6 
## 22  2018    10  4609. 38.1   29.9   44.6  17.0   27.6 
## 23  2018    11  4257. 35.2   25.1   41.4  15.2   26.2 
## 24  2018    12  3717. 30.7   18.2   36.4  11.9   24.5 
## 25  2019     1  4386. 36.6   29.2   44.8  17.8   26.9 
## 26  2019     2  2722. 22.5   19.8   28.1   9.97  18.1 
## 27  2019     3  1830. 15.3   13.1   21.5   6.02  15.4 
## 28  2019     4  1277. 10.5    7.40  13.0   4.03   8.98
## 29  2019     5  1785. 14.8    9.25  16.6   4.73  11.9 
## 30  2019     6  4067. 33.6   20.6   40.7  10.4   30.4 
## 31  2019     7  9679. 80.0   52.7   98.9  27.4   71.4 
## 32  2019     8 10464. 87.2   78.6  112.   48.9   63.3 
## 33  2019     9  7070. 58.4   57.2   76.2  38.2   38.0 
## 34  2019    10  5049. 41.7   37.7   55.4  24.3   31.0 
## 35  2019    11  3292. 27.2   24.3   36.7  13.7   23.0 
## 36  2019    12  2016. 16.7   12.2   22.9   5.84  17.1 
## 37  2020     1  3179. 26.5   19.3   33.5  10.5   23.1 
## 38  2020     2  1883. 15.6   12.3   20.8   7.18  13.6 
## 39  2020     3   920.  7.73   4.77  10.1   2.63   7.48
## 40  2020     4   316.  2.93   1.61   4.49  0.760  3.73
## 41  2020     5   305.  2.78   1.74   3.81  0.585  3.23
## 42  2020     6   455.  4.10   2.50   5.86  1.04   4.82
## 43  2020     7   746.  6.54   3.44   9.12  1.40   7.72
## 44  2020     8   791.  6.70   3.45   6.79  1.50   5.29
## 45  2020     9   687.  5.88   2.31   5.59  1.33   4.26
## 46  2020    10   615.  5.35   2.87   5.51  1.15   4.35
## 47  2020    11   673.  5.96   3.36   6.14  1.30   4.84
## 48  2020    12   521.  4.69   2.22   5.03  1.13   3.90
## 49  2021     1   915.  7.69   4.21  11.2   1.70   9.48
## 50  2021     2   720.  6.10   3.77   8.00  1.37   6.63
## 51  2021     3   651.  5.66   3.25   6.11  1.65   4.46
## 52  2021     4   452.  3.97   2.17   4.20  1.15   3.05
## 53  2021     5   480.  4.21   2.20   3.93  1.07   2.86
## 54  2021     6   739.  6.54   2.76   5.36  1.41   3.95
## 55  2021     7  1256. 11.0    4.89  10.0   2.32   7.71
## 56  2021     8  1106.  9.62   4.56  10.5   2.05   8.43
## 57  2021     9   759.  6.49   3.76   7.46  1.68   5.78
## 58  2021    10   895.  7.85   5.22  10.3   2.33   8.02
## 59  2021    11  1074.  9.18   5.19  13.5   2.55  11.0 
## 60  2021    12   734.  6.44   3.82   8.51  1.66   6.85
## 61  2022     1   744.  6.30   4.66   8.70  2.51   6.19
## 62  2022     2   783.  6.52   4.96   8.02  2.58   5.43
## 63  2022     3  1112.  9.34   6.23  11.1   3.85   7.22
## 64  2022     4  1692. 14.0    7.85  17.3   4.67  12.7 
## 65  2022     5  3058. 25.3   15.6   29.1   7.74  21.3 
## 66  2022     6  5059. 41.8   23.9   46.3  13.8   32.5 
## 67  2022     7  6516. 53.9   34.4   51.5  19.5   32.0 
## 68  2022     8  5455. 45.1   29.1   47.8  19.0   28.9 
## 69  2022     9  4272. 35.3   25.1   39.7  16.3   23.4 
## 70  2022    10  3032. 25.1   18.3   27.8  11.9   15.8 
## 71  2022    11  2828. 23.4   18.1   27.5  10.6   16.9 
## 72  2022    12  1947. 16.1   11.4   19.8   6.79  13.1 
## 73  2023     1  2013. 16.8   13.3   22.3   7.42  14.9 
## 74  2023     2  1540. 12.7   10.0   17.9   6.05  11.8 
## 75  2023     3  1351. 11.3    8.35  14.5   5.44   9.05
## 76  2023     4  1275. 10.6    7.34  13.8   4.70   9.13
## 77  2023     5  1864. 15.7    8.57  18.3   5.08  13.2 
## 78  2023     6  2605. 21.9   14.5   26.7   8.03  18.6 
## 79  2023     7  3472. 28.7   16.2   32.4  11.5   20.9 
## 80  2023     8  3687. 30.5   18.5   37.7  11.9   25.8 
## 81  2023     9  4148. 34.3   21.9   40.2  11.4   28.9 
## 82  2023    10  4711. 38.9   23.1   38.4  11.8   26.6 
## 83  2023    11  3490. 28.8   21.8   36.7  12.5   24.2 
## 84  2023    12  2624. 21.9   17.0   27.8   9.75  18.1 
## 85  2024     1  2701. 22.3   16.0   30.8   9.86  20.9 
## 86  2024     2  2062. 17.2   12.8   23.2   7.06  16.2 
## 87  2024     3  1801. 15.0   10.9   22.6   5.33  17.3 
## 88  2024     4  1562. 13.3    8.49  19.3   4.57  14.7 
## 89  2024     5  1840. 15.9   10.1   22.4   4.23  18.1 
## 90  2024     6  3101. 26.5   14.0   30.7   6.03  24.6 
## 91  2024     7   339.  3.29   1.45   3.98  0.785  3.20
#incidence per month
incidence_per_month <- incidence %>%
  group_by(Month) %>%
  summarize(Total=sum(Incidence_per_100000, na.rm=TRUE), Mean=mean(Incidence_per_100000, na.rm = TRUE), Median=median(Incidence_per_100000, na.rm=TRUE), Q3=quantile(Incidence_per_100000, probs=0.75, na.rm = TRUE), Q1=quantile(Incidence_per_100000, probs = 0.25, na.rm=TRUE), IQR=Q3-Q1, SD=sd(Incidence_per_100000, na.rm=TRUE), lower_CI=Mean-SD, Upper_CI=Mean+SD) 

print(incidence_per_month, n=12)
## # A tibble: 12 × 10
##    Month  Total  Mean Median    Q3    Q1   IQR    SD lower_CI Upper_CI
##    <int>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1     1 17484. 18.3   12.3   23.7  5.80 17.9   20.2   -1.96      38.5
##  2     2 12025. 12.5    8.70  16.5  4.39 12.1   12.4    0.142     24.9
##  3     3  9637. 10.1    6.77  12.8  3.50  9.25  10.8   -0.712     21.0
##  4     4  8113.  8.66   5.26  10.0  2.53  7.50  12.2   -3.52      20.8
##  5     5 11566. 12.3    6.11  14.0  2.79 11.2   24.0  -11.6       36.3
##  6     6 20191. 21.5   10.3   23.4  4.06 19.3   45.8  -24.3       67.3
##  7     7 30639. 32.9   14.0   35.1  4.08 31.0  102.   -68.9      135. 
##  8     8 30595. 36.6   20.1   46.2  6.32 39.9   67.6  -31.0      104. 
##  9     9 23314. 27.9   17.5   37.5  5.03 32.4   35.3   -7.42      63.2
## 10    10 20620. 24.8   16.1   32.9  5.48 27.4   44.0  -19.2       68.8
## 11    11 17172. 20.6   14.7   27.0  5.24 21.7   26.2   -5.57      46.9
## 12    12 12729. 15.4    9.52  18.8  3.69 15.1   21.0   -5.67      36.4
#Plot incidence per month
ggplot(incidence_per_month, aes(x=Month, y=Median)) + 
  geom_point(size=4, colour = "blue") +
  geom_line() +
  geom_errorbar(aes(ymin = Q1, ymax = Q3), width = 0.2, colour = "red") +  # Add error bars
  labs(title = "Monthly Incidence of Dengue in the Philippines, 2017- July 2024",
     x = "Month",
     y = "Median incidence per 100,000 population") +
  theme_minimal() +   # Adding a clean theme
  theme(plot.title = element_text(hjust = 0.5)) #Center the plot title

#Plot polar coordinates
ggplot(incidence_per_yearmonth, aes(x=Month, y = Mean, group=Year, color=Year)) +
  geom_line(aes(linewidth = 0.5)) +
  coord_polar(start = -pi/12)

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.