# COVID-19 figures new cases and deaths in ALBANIA period: 31 December 2019- 12 October 2020
#
# Eralda Gjika , 
# Department of Applied Mathematics, Faculty of Natural Science, University of Tirana, ALBANIA
# E-Mail: eralda.gjika@fshn.edu.al
# LinkedIn: https://www.linkedin.com/in/eralda-dhamo-gjika-71879128/ 
#
# Install and use the "BeyondBenford" package in R
#
# DATA SOURCE 
# Script for downloading the CSV file into “R” software
# Make sure that you have the “utils” package installed.
# these libraries need to be loaded
library(utils)
#read the Dataset sheet into “R”. The dataset will be called "data".
data <- read.csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na.strings = "", fileEncoding = "UTF-8-BOM")
#
library(BeyondBenford) # the library needed for the test and graphics
COVID.albania <- read.csv("~/COVID 19-folder R project/COVID -Abania 12 Oct.csv")
head(COVID.albania,10)
##                 dateRep day month year cases deaths countriesAndTerritories
## 1  2020-10-13T00:00:00Z  13    10 2020   171      4                 Albania
## 2  2020-10-12T00:00:00Z  12    10 2020   168      4                 Albania
## 3  2020-10-11T00:00:00Z  11    10 2020   165      3                 Albania
## 4  2020-10-10T00:00:00Z  10    10 2020   167      2                 Albania
## 5  2020-10-09T00:00:00Z   9    10 2020   169      4                 Albania
## 6  2020-10-08T00:00:00Z   8    10 2020   162      4                 Albania
## 7  2020-10-07T00:00:00Z   7    10 2020   158      3                 Albania
## 8  2020-10-06T00:00:00Z   6    10 2020   144      4                 Albania
## 9  2020-10-05T00:00:00Z   5    10 2020   149      4                 Albania
## 10 2020-10-04T00:00:00Z   4    10 2020   152      3                 Albania
##    geoId countryterritoryCode popData2019 continentExp
## 1     AL                  ALB     2862427       Europe
## 2     AL                  ALB     2862427       Europe
## 3     AL                  ALB     2862427       Europe
## 4     AL                  ALB     2862427       Europe
## 5     AL                  ALB     2862427       Europe
## 6     AL                  ALB     2862427       Europe
## 7     AL                  ALB     2862427       Europe
## 8     AL                  ALB     2862427       Europe
## 9     AL                  ALB     2862427       Europe
## 10    AL                  ALB     2862427       Europe
##    Cumulative_number_for_14_days_of_COVID.19_cases_per_100000
## 1                                                    76.12421
## 2                                                    74.76173
## 3                                                    72.59574
## 4                                                    70.60442
## 5                                                    69.10220
## 6                                                    67.87946
## 7                                                    66.44711
## 8                                                    65.50385
## 9                                                    65.71347
## 10                                                   66.06282
obs.numb.dig(COVID.albania$cases, dig=1)# it counts how many observations  for the variable "New Cases" have respectively 1,2,...,9 in their first digit
## [1] 119  24   4  12   8  17  10  12  12
obs.numb.dig(COVID.albania$cases, dig=2)# same as previous in the second digit
##  [1] 18 19 27 23 19 32 18 13 10  7
#
# For a better view of the histogram we can change the number of classes
dat.distr(COVID.albania$cases, dig=1, nclass=10,legend=FALSE,xlab="Observation",ylab="Frequence",main="Distribution of observations (Albania new cases)")
legend(50,60,"Benford distribution",fill="red",box.col = "white")
dat.distr(COVID.albania$deaths, dig=1, nclass=10,legend=FALSE,xlab="Observation",ylab="Frequence",main="Distribution of observations (Albania deaths)")
legend(3,35,"Benford distribution",fill="red",box.col = "white")
#
# Comparing Benford and Blondeau with real observations (Reference: https://cran.r-project.org/web/packages/BeyondBenford/index.html )
digit.distr(COVID.albania$cases, dig=1, mod="ben&blo", No.sd=1, Sd.pr=1, main="First digit distribution (Albania new cases)")
digit.distr(COVID.albania$cases, dig=2, mod="ben&blo", No.sd=1, Sd.pr=1, main="Second digit distribution (Albania new cases)")
#
dat.distr(COVID.albania$deaths, dig=1, nclass=20,legend=FALSE,xlab="Observations",ylab="Frequency",main="Distribution of observed data (Albania deaths)")
legend(4,30,"Benford distribution",fill="red",box.col = "white")
digit.distr(COVID.albania$deaths, dig=1, mod="ben&blo", No.sd=1, Sd.pr=1, main="First digit distribution (Albania deaths)")
digit.distr(COVID.albania$deaths, dig=2, mod="ben&blo", No.sd=1, Sd.pr=1, main="Second digit distribution (Albania deaths)")
## [1] "No eligible value"
#
# Now let us use the Chi-Square Pearson test for the distribution of the observation and their fit with Benford or Blondeau distribution.
# Let start with " New cases"
chi2(COVID.albania$cases, dig=1, pval=1)#  if the data follow Benford distribution
##               chi2                 pval
## 1   Chi2 value is:      The p-value is:
## 2 78.9655009667095 7.89368570508486e-14
chi2(COVID.albania$cases, dig=1, pval=1, mod="BDS") #  if the data follow Blondeau distribution
##               chi2                 pval
## 1   Chi2 value is:      The p-value is:
## 2 78.6334671193325 9.20374887414255e-14
# Now for the deaths
chi2(COVID.albania$deaths, dig=1, pval=1)#  if the data follow Benford distribution
##               chi2                 pval
## 1   Chi2 value is:      The p-value is:
## 2 47.1347209133463 1.44531085100041e-07
chi2(COVID.albania$deaths, dig=1, pval=1, mod="BDS") #  if the data follow Blondeau distribution
## [1] "Chi2 can not be applied: at least one insufficient theoretical frequency"
# If p=value >0.05 , we accept the null hypothesis H0: the data follow Benford (Blondeau) distribution. 
# If p-vale < 0.05 there is significant evidence to reject the null hypothesis
# 
# This is  an ongoing work. Please comment for advice. 
#
# Eralda Gjika , Department of Applied Mathematics, Faculty of Natural Science, University of Tirana, ALBANIA