# COVID-19 figures new cases and deaths in ALBANIA period: 31 December 2019- 12 October 2020
#
# Eralda Gjika ,
# Department of Applied Mathematics, Faculty of Natural Science, University of Tirana, ALBANIA
# E-Mail: eralda.gjika@fshn.edu.al
# LinkedIn: https://www.linkedin.com/in/eralda-dhamo-gjika-71879128/
#
# Install and use the "BeyondBenford" package in R
#
# DATA SOURCE
# Script for downloading the CSV file into “R” software
# Make sure that you have the “utils” package installed.
# these libraries need to be loaded
library(utils)
#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")
#
library(BeyondBenford) # the library needed for the test and graphics
COVID.albania <- read.csv("~/COVID 19-folder R project/COVID -Abania 12 Oct.csv")
head(COVID.albania,10)
## dateRep day month year cases deaths countriesAndTerritories
## 1 2020-10-13T00:00:00Z 13 10 2020 171 4 Albania
## 2 2020-10-12T00:00:00Z 12 10 2020 168 4 Albania
## 3 2020-10-11T00:00:00Z 11 10 2020 165 3 Albania
## 4 2020-10-10T00:00:00Z 10 10 2020 167 2 Albania
## 5 2020-10-09T00:00:00Z 9 10 2020 169 4 Albania
## 6 2020-10-08T00:00:00Z 8 10 2020 162 4 Albania
## 7 2020-10-07T00:00:00Z 7 10 2020 158 3 Albania
## 8 2020-10-06T00:00:00Z 6 10 2020 144 4 Albania
## 9 2020-10-05T00:00:00Z 5 10 2020 149 4 Albania
## 10 2020-10-04T00:00:00Z 4 10 2020 152 3 Albania
## geoId countryterritoryCode popData2019 continentExp
## 1 AL ALB 2862427 Europe
## 2 AL ALB 2862427 Europe
## 3 AL ALB 2862427 Europe
## 4 AL ALB 2862427 Europe
## 5 AL ALB 2862427 Europe
## 6 AL ALB 2862427 Europe
## 7 AL ALB 2862427 Europe
## 8 AL ALB 2862427 Europe
## 9 AL ALB 2862427 Europe
## 10 AL ALB 2862427 Europe
## Cumulative_number_for_14_days_of_COVID.19_cases_per_100000
## 1 76.12421
## 2 74.76173
## 3 72.59574
## 4 70.60442
## 5 69.10220
## 6 67.87946
## 7 66.44711
## 8 65.50385
## 9 65.71347
## 10 66.06282
obs.numb.dig(COVID.albania$cases, dig=1)# it counts how many observations for the variable "New Cases" have respectively 1,2,...,9 in their first digit
## [1] 119 24 4 12 8 17 10 12 12
obs.numb.dig(COVID.albania$cases, dig=2)# same as previous in the second digit
## [1] 18 19 27 23 19 32 18 13 10 7
#
# For a better view of the histogram we can change the number of classes
dat.distr(COVID.albania$cases, dig=1, nclass=10,legend=FALSE,xlab="Observation",ylab="Frequence",main="Distribution of observations (Albania new cases)")
legend(50,60,"Benford distribution",fill="red",box.col = "white")
dat.distr(COVID.albania$deaths, dig=1, nclass=10,legend=FALSE,xlab="Observation",ylab="Frequence",main="Distribution of observations (Albania deaths)")
legend(3,35,"Benford distribution",fill="red",box.col = "white")
#
# Comparing Benford and Blondeau with real observations (Reference: https://cran.r-project.org/web/packages/BeyondBenford/index.html )
digit.distr(COVID.albania$cases, dig=1, mod="ben&blo", No.sd=1, Sd.pr=1, main="First digit distribution (Albania new cases)")
digit.distr(COVID.albania$cases, dig=2, mod="ben&blo", No.sd=1, Sd.pr=1, main="Second digit distribution (Albania new cases)")
#
dat.distr(COVID.albania$deaths, dig=1, nclass=20,legend=FALSE,xlab="Observations",ylab="Frequency",main="Distribution of observed data (Albania deaths)")
legend(4,30,"Benford distribution",fill="red",box.col = "white")
digit.distr(COVID.albania$deaths, dig=1, mod="ben&blo", No.sd=1, Sd.pr=1, main="First digit distribution (Albania deaths)")
digit.distr(COVID.albania$deaths, dig=2, mod="ben&blo", No.sd=1, Sd.pr=1, main="Second digit distribution (Albania deaths)")
## [1] "No eligible value"
#
# Now let us use the Chi-Square Pearson test for the distribution of the observation and their fit with Benford or Blondeau distribution.
# Let start with " New cases"
chi2(COVID.albania$cases, dig=1, pval=1)# if the data follow Benford distribution
## chi2 pval
## 1 Chi2 value is: The p-value is:
## 2 78.9655009667095 7.89368570508486e-14
chi2(COVID.albania$cases, dig=1, pval=1, mod="BDS") # if the data follow Blondeau distribution
## chi2 pval
## 1 Chi2 value is: The p-value is:
## 2 78.6334671193325 9.20374887414255e-14
# Now for the deaths
chi2(COVID.albania$deaths, dig=1, pval=1)# if the data follow Benford distribution
## chi2 pval
## 1 Chi2 value is: The p-value is:
## 2 47.1347209133463 1.44531085100041e-07
chi2(COVID.albania$deaths, dig=1, pval=1, mod="BDS") # if the data follow Blondeau distribution
## [1] "Chi2 can not be applied: at least one insufficient theoretical frequency"
# If p=value >0.05 , we accept the null hypothesis H0: the data follow Benford (Blondeau) distribution.
# If p-vale < 0.05 there is significant evidence to reject the null hypothesis
#
# This is an ongoing work. Please comment for advice.
#
# Eralda Gjika , Department of Applied Mathematics, Faculty of Natural Science, University of Tirana, ALBANIA