Answer
- Import the dataset into R environment
library(httr)
library(readxl)
url<- "http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/cd971423-e02e-415d-85f5-77c0118d242e/download/2013.xlsx"
GET(url, write_disk(tf<-tempfile(fileext = ".xlsx")))
## Response [http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/cd971423-e02e-415d-85f5-77c0118d242e/download/2013.xlsx]
## Date: 2019-10-02 13:57
## Status: 200
## Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
## Size: 22.3 kB
## <ON DISK> C:\Users\Acer\AppData\Local\Temp\RtmpWOxTGI\file57d460b44b2d.xlsx
data2013 <- read_excel(tf)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ... and 11 more problems
- Make sure the variable names are Minggu and states from Johor to WP Labuan
names(data2013) <-lapply(data2013[2,], as.character)
## Warning: Must use a character vector as names.
## This warning is displayed once per session.
data2013 <- data2013[-c(1,2),]
- Remove Variable name Malaysia
data2013$Malaysia <- NULL
- Rename WP Kuala Lumpur to ‘Kuala Lumpur’
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data2013 <- rename(data2013,
"Kuala Lumpur" = "WP Kuala Lumpur")
- Rename WP Labuan to ‘Labuan’
library(dplyr)
data2013 <- rename(data2013, "Labuan" = "WP Labuan")
- Remove Observation Total (entire row).
data2013 <- data2013[-53,]
- Add another column, as Year (make sure all observation in this year are same, eg: 2013).
data2013$Year <- 2013
- Create another object for this dataset.
#order year to be first variable in dataset
data2013 <- select(data2013, Year, everything())
data2013[1:17] <- sapply(data2013[1:17],
as.numeric)
- group the observations to quarterly (Quarter 1 - Quarter 4).(take summation)
quarter2013 <-
data2013%>%
mutate(quarter = ifelse(`Epid Minggu` <= 13, "quarter 1",
ifelse (`Epid Minggu` <= 26, "quarter 2",
ifelse(`Epid Minggu` <= 39, "quarter 3", "quarter 4"))))
- Do the same process to year 2014, 2015, 2016, 2017 and 2018.
#2014
library(httr)
library(readxl)
url<- "http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/76ed6414-c5cb-4c87-9d48-4b4bc1185bdb/download/2014.xlsx"
GET(url, write_disk(tf<-tempfile(fileext = ".xlsx")))
## Response [http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/76ed6414-c5cb-4c87-9d48-4b4bc1185bdb/download/2014.xlsx]
## Date: 2019-10-02 13:57
## Status: 200
## Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
## Size: 22.4 kB
## <ON DISK> C:\Users\Acer\AppData\Local\Temp\RtmpWOxTGI\file57d44dfbb5f.xlsx
data2014 <- read_excel(tf)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ... and 11 more problems
names(data2014) <-lapply(data2014[2,], as.character)
data2014 <- data2014[-c(1,2),]
data2014$Malaysia <- NULL
library(dplyr)
data2014 <- rename(data2014,
"Kuala Lumpur" = "WP Kuala Lumpur")
library(dplyr)
data2014 <- rename(data2014, "Labuan" = "WP Labuan")
data2014 <- data2014[-53,]
data2014$Year <- 2014
#order year to be first variable in dataset
data2014 <- select(data2014, Year, everything())
data2014[1:17] <- sapply(data2014[1:17],
as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
quarter2014 <-
data2014%>%
mutate(quarter = ifelse(`Epid Minggu` <= 14, "quarter 1",
ifelse (`Epid Minggu` <= 27, "quarter 2",
ifelse(`Epid Minggu` <= 40, "quarter 3", "quarter 4"))))
#2015
library(httr)
library(readxl)
url<- "http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/3ca80408-74cd-4403-8f97-d11e402ddb3d/download/2015.xlsx"
GET(url, write_disk(tf<-tempfile(fileext = ".xlsx")))
## Response [http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/3ca80408-74cd-4403-8f97-d11e402ddb3d/download/2015.xlsx]
## Date: 2019-10-02 13:57
## Status: 200
## Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
## Size: 22.3 kB
## <ON DISK> C:\Users\Acer\AppData\Local\Temp\RtmpWOxTGI\file57d471446b9d.xlsx
data2015 <- read_excel(tf)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ... and 11 more problems
names(data2015) <-lapply(data2015[2,], as.character)
data2015 <- data2015[-c(1,2),]
data2015$Malaysia <- NULL
library(dplyr)
data2015 <- rename(data2015,
"Kuala Lumpur" = "WP Kuala Lumpur")
library(dplyr)
data2015 <- rename(data2015, "Labuan" = "WP Labuan")
data2015 <- data2015[-53,]
data2015$Year <- 2015
#order year to be first variable in dataset
data2015 <- select(data2015, Year, everything())
data2015[1:17] <- sapply(data2015[1:17],
as.numeric)
quarter2015 <-
data2015%>%
mutate(quarter = ifelse(`Epid Minggu` <= 13, "quarter 1",
ifelse (`Epid Minggu` <= 26, "quarter 2",
ifelse(`Epid Minggu` <= 39, "quarter 3", "quarter 4"))))
#2016
library(httr)
library(readxl)
url<- "http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/40f38fee-e376-45dc-af96-067c588e9e4d/download/2016.xlsx"
GET(url, write_disk(tf<-tempfile(fileext = ".xlsx")))
## Response [http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/40f38fee-e376-45dc-af96-067c588e9e4d/download/2016.xlsx]
## Date: 2019-10-02 13:57
## Status: 200
## Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
## Size: 22.2 kB
## <ON DISK> C:\Users\Acer\AppData\Local\Temp\RtmpWOxTGI\file57d443d757dd.xlsx
data2016 <- read_excel(tf)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ... and 11 more problems
names(data2016) <-lapply(data2016[2,], as.character)
data2016 <- data2016[-c(1,2),]
data2016$Malaysia <- NULL
library(dplyr)
data2016 <- rename(data2016,
"Kuala Lumpur" = "WP Kuala Lumpur")
library(dplyr)
data2016 <- rename(data2016, "Labuan" = "WP Labuan")
data2016 <- data2016[-53,]
data2016$Year <- 2016
#order year to be first variable in dataset
data2016 <- select(data2016, Year, everything())
data2016[1:17] <- sapply(data2016[1:17],
as.numeric)
quarter2016 <-
data2016%>%
mutate(quarter = ifelse(`Epid Minggu` <= 13, "quarter 1",
ifelse (`Epid Minggu` <= 26, "quarter 2",
ifelse(`Epid Minggu` <= 39, "quarter 3", "quarter 4"))))
#2017
library(httr)
library(readxl)
url<- "http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/66a5e3d0-55cd-4096-a701-e134b91b104d/download/2017.xlsx"
GET(url, write_disk(tf<-tempfile(fileext = ".xlsx")))
## Response [http://www.data.gov.my/data/ms_MY/dataset/bbd0dfba-b586-47ae-91ea-6f0b7880d25b/resource/66a5e3d0-55cd-4096-a701-e134b91b104d/download/2017.xlsx]
## Date: 2019-10-02 13:57
## Status: 200
## Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
## Size: 22.4 kB
## <ON DISK> C:\Users\Acer\AppData\Local\Temp\RtmpWOxTGI\file57d42a84d4e.xlsx
data2017 <- read_excel(tf)
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ... and 11 more problems
names(data2017) <-lapply(data2017[2,], as.character)
data2017 <- data2017[-c(1,2),]
data2017$Malaysia <- NULL
library(dplyr)
data2017 <- rename(data2017,
"Kuala Lumpur" = "WP Kuala Lumpur")
library(dplyr)
data2017 <- rename(data2017, "Labuan" = "WP Labuan")
data2017 <- data2017[-53,]
data2017$Year <- 2017
#order year to be first variable in dataset
data2017 <- select(data2017, Year, everything())
data2017[1:17] <- sapply(data2017[1:17],
as.numeric)
quarter2017 <-
data2017%>%
mutate(quarter = ifelse(`Epid Minggu` <= 13, "quarter 1",
ifelse (`Epid Minggu` <= 26, "quarter 2",
ifelse(`Epid Minggu` <= 39, "quarter 3", "quarter 4"))))
- combine these dataset to one dataset. (suppose you should have 2 dataset (1 by week, another 1 is by quarter).
#combine by quarter
combine <- rbind(quarter2013, quarter2014,
quarter2015, quarter2016,
quarter2017)
#combine by week
combine_week <- rbind(data2013, data2014,data2015, data2016, data2017)
combine <- select(combine, Year,`Epid Minggu`, quarter, everything())
- from quarter dataset, select only quarter 1 and quarter 3 for Perak.
group_by(combine, quarter) %>%
filter(quarter == "Quarter 1" | quarter == "Quarter 3",
Year == 2013) %>%
summarise(Perak_case = sum(Perak))
## # A tibble: 0 x 2
## # ... with 2 variables: quarter <chr>, Perak_case <dbl>
- from weekly dataset, select only Perak and Perlis.
group_by(combine, quarter) %>%
filter(Year == 2014) %>%
summarise(Perak_case = sum(Perak),
Perlis_case = sum(Perlis))
## # A tibble: 5 x 3
## quarter Perak_case Perlis_case
## <chr> <dbl> <dbl>
## 1 quarter 1 442 32
## 2 quarter 2 376 33
## 3 quarter 3 374 29
## 4 quarter 4 341 23
## 5 <NA> 1611 119
- from quarter dataset, find the total for the Peninsular Malaysia and East Malaysia based on quarter and year.
#peninsular Malaysia
group_by(combine,quarter, Year) %>%
summarise(Johor_case = sum(Johor),
Kedah_case = sum(Kedah),
Kelantan_case = sum(Kelantan),
Melaka_case = sum(Melaka),
NegeriSembilan_case = sum(`Negeri Sembilan`),
Pahang_case = sum(Pahang),
Perak_case = sum(Perak),
Perlis_case = sum(Perlis),
PulauPinang_case = sum(`Pulau Pinang`),
Selangor_case = sum(Selangor),
Terengganu_case = sum(Terengganu),
KualaLumpur_case = sum(`Kuala Lumpur`))
## # A tibble: 21 x 14
## # Groups: quarter [5]
## quarter Year Johor_case Kedah_case Kelantan_case Melaka_case
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 quarte~ 2013 508 277 341 131
## 2 quarte~ 2014 613 308 423 151
## 3 quarte~ 2015 580 294 317 113
## 4 quarte~ 2016 534 310 337 127
## 5 quarte~ 2017 483 322 268 148
## 6 quarte~ 2013 499 272 378 138
## 7 quarte~ 2014 508 312 317 147
## 8 quarte~ 2015 668 324 323 137
## 9 quarte~ 2016 540 322 319 163
## 10 quarte~ 2017 560 377 305 155
## # ... with 11 more rows, and 8 more variables: NegeriSembilan_case <dbl>,
## # Pahang_case <dbl>, Perak_case <dbl>, Perlis_case <dbl>,
## # PulauPinang_case <dbl>, Selangor_case <dbl>, Terengganu_case <dbl>,
## # KualaLumpur_case <dbl>
#East Malaysia
group_by(combine,quarter, Year) %>%
summarise(Sabah_case = sum(Sabah),
Sarawak_case = sum(Sarawak),
Labuan_case = sum(Labuan))
## # A tibble: 21 x 5
## # Groups: quarter [5]
## quarter Year Sabah_case Sarawak_case Labuan_case
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 quarter 1 2013 1144 677 18
## 2 quarter 1 2014 1173 802 25
## 3 quarter 1 2015 1048 604 25
## 4 quarter 1 2016 1215 618 28
## 5 quarter 1 2017 1135 694 27
## 6 quarter 2 2013 1120 600 30
## 7 quarter 2 2014 1199 627 24
## 8 quarter 2 2015 1066 612 26
## 9 quarter 2 2016 1196 733 35
## 10 quarter 2 2017 1231 671 36
## # ... with 11 more rows
- from weekly dataset, find the total for the Peninsular Malaysia and East Malaysia based on weekly and year.
#peninsular Malaysia
group_by(combine_week, 'Epid Minggu', Year) %>%
summarise(Johor_case = sum(Johor),
Kedah_case = sum(Kedah),
Kelantan_case = sum(Kelantan),
Melaka_case = sum(Melaka),
NegeriSembilan_case = sum(`Negeri Sembilan`),
Pahang_case = sum(Pahang),
Perak_case = sum(Perak),
Perlis_case = sum(Perlis),
PulauPinang_case = sum(`Pulau Pinang`),
Selangor_case = sum(Selangor),
Terengganu_case = sum(Terengganu),
KualaLumpur_case = sum(`Kuala Lumpur`))
## # A tibble: 5 x 14
## # Groups: "Epid Minggu" [1]
## `"Epid Minggu"` Year Johor_case Kedah_case Kelantan_case Melaka_case
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Epid Minggu 2013 2246 1169 1402 552
## 2 Epid Minggu 2014 4568 2551 2717 1302
## 3 Epid Minggu 2015 2409 1279 1233 513
## 4 Epid Minggu 2016 2325 1283 1254 571
## 5 Epid Minggu 2017 2251 1417 1300 639
## # ... with 8 more variables: NegeriSembilan_case <dbl>, Pahang_case <dbl>,
## # Perak_case <dbl>, Perlis_case <dbl>, PulauPinang_case <dbl>,
## # Selangor_case <dbl>, Terengganu_case <dbl>, KualaLumpur_case <dbl>
#East Malaysia
group_by(combine_week, 'Epid Minggu', Year) %>%
summarise(Sabah_case = sum(Sabah),
Sarawak_case = sum(Sarawak),
Labuan_case = sum(Labuan))
## # A tibble: 5 x 5
## # Groups: "Epid Minggu" [1]
## `"Epid Minggu"` Year Sabah_case Sarawak_case Labuan_case
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Epid Minggu 2013 4526 2672 99
## 2 Epid Minggu 2014 9379 5515 208
## 3 Epid Minggu 2015 4464 2575 116
## 4 Epid Minggu 2016 4953 2857 133
## 5 Epid Minggu 2017 5106 2797 137
- from quarter dataset, find the total for Peninsular Malaysia and East Malaysia based on year.
#peninsular Malaysia
group_by(combine, Year) %>%
summarise(Johor_case = sum(Johor),
Kedah_case = sum(Kedah),
Kelantan_case = sum(Kelantan),
Melaka_case = sum(Melaka),
NegeriSembilan_case = sum(`Negeri Sembilan`),
Pahang_case = sum(Pahang),
Perak_case = sum(Perak),
Perlis_case = sum(Perlis),
PulauPinang_case = sum(`Pulau Pinang`),
Selangor_case = sum(Selangor),
Terengganu_case = sum(Terengganu),
KualaLumpur_case = sum(`Kuala Lumpur`))
## # A tibble: 5 x 13
## Year Johor_case Kedah_case Kelantan_case Melaka_case NegeriSembilan_~
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2013 2246 1169 1402 552 631
## 2 2014 4568 2551 2717 1302 1309
## 3 2015 2409 1279 1233 513 667
## 4 2016 2325 1283 1254 571 641
## 5 2017 2251 1417 1300 639 631
## # ... with 7 more variables: Pahang_case <dbl>, Perak_case <dbl>,
## # Perlis_case <dbl>, PulauPinang_case <dbl>, Selangor_case <dbl>,
## # Terengganu_case <dbl>, KualaLumpur_case <dbl>
#East Malaysia
group_by(combine, Year) %>%
summarise(Sabah_case = sum(Sabah),
Sarawak_case = sum(Sarawak),
Labuan_case = sum(Labuan))
## # A tibble: 5 x 4
## Year Sabah_case Sarawak_case Labuan_case
## <dbl> <dbl> <dbl> <dbl>
## 1 2013 4526 2672 99
## 2 2014 9379 5515 208
## 3 2015 4464 2575 116
## 4 2016 4953 2857 133
## 5 2017 5106 2797 137
- from quarter dataset, find the total for Malaysia based on Quarter and year
#Malaysia
group_by(combine, quarter, Year) %>%
summarise(Johor_case = sum(Johor),
Kedah_case = sum(Kedah),
Kelantan_case = sum(Kelantan),
Melaka_case = sum(Melaka),
NegeriSembilan_case = sum(`Negeri Sembilan`),
Pahang_case = sum(Pahang),
Perak_case = sum(Perak),
Perlis_case = sum(Perlis),
PulauPinang_case = sum(`Pulau Pinang`),
Selangor_case = sum(Selangor),
Terengganu_case = sum(Terengganu),
KualaLumpur_case = sum(`Kuala Lumpur`),
Sabah_case = sum(Sabah),
Sarawak_case = sum(Sarawak),
Labuan_case = sum(Labuan))
## # A tibble: 21 x 17
## # Groups: quarter [5]
## quarter Year Johor_case Kedah_case Kelantan_case Melaka_case
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 quarte~ 2013 508 277 341 131
## 2 quarte~ 2014 613 308 423 151
## 3 quarte~ 2015 580 294 317 113
## 4 quarte~ 2016 534 310 337 127
## 5 quarte~ 2017 483 322 268 148
## 6 quarte~ 2013 499 272 378 138
## 7 quarte~ 2014 508 312 317 147
## 8 quarte~ 2015 668 324 323 137
## 9 quarte~ 2016 540 322 319 163
## 10 quarte~ 2017 560 377 305 155
## # ... with 11 more rows, and 11 more variables: NegeriSembilan_case <dbl>,
## # Pahang_case <dbl>, Perak_case <dbl>, Perlis_case <dbl>,
## # PulauPinang_case <dbl>, Selangor_case <dbl>, Terengganu_case <dbl>,
## # KualaLumpur_case <dbl>, Sabah_case <dbl>, Sarawak_case <dbl>,
## # Labuan_case <dbl>
- From quarter dataset, find the total for Malaysia based on year.
group_by(combine, Year) %>%
summarise(Johor_case = sum(Johor),
Kedah_case = sum(Kedah),
Kelantan_case = sum(Kelantan),
Melaka_case = sum(Melaka),
NegeriSembilan_case = sum(`Negeri Sembilan`),
Pahang_case = sum(Pahang),
Perak_case = sum(Perak),
Perlis_case = sum(Perlis),
PulauPinang_case = sum(`Pulau Pinang`),
Selangor_case = sum(Selangor),
Terengganu_case = sum(Terengganu),
KualaLumpur_case = sum(`Kuala Lumpur`),
Sabah_case = sum(Sabah),
Sarawak_case = sum(Sarawak),
Labuan_case = sum(Labuan))
## # A tibble: 5 x 16
## Year Johor_case Kedah_case Kelantan_case Melaka_case NegeriSembilan_~
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2013 2246 1169 1402 552 631
## 2 2014 4568 2551 2717 1302 1309
## 3 2015 2409 1279 1233 513 667
## 4 2016 2325 1283 1254 571 641
## 5 2017 2251 1417 1300 639 631
## # ... with 10 more variables: Pahang_case <dbl>, Perak_case <dbl>,
## # Perlis_case <dbl>, PulauPinang_case <dbl>, Selangor_case <dbl>,
## # Terengganu_case <dbl>, KualaLumpur_case <dbl>, Sabah_case <dbl>,
## # Sarawak_case <dbl>, Labuan_case <dbl>