Import Libraries

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Read the Data

url = 'https://raw.githubusercontent.com/AlphaCurse/CyberThreats/main/cyberthreats.csv'
df = read.csv(url)
head(df)
##   ï..Year Adware Backdoor Ransomware Trojan
## 1    2022  2,242      356       2352    680
## 2    2016  2,679      664       3634    429
## 3    2020  1,853      523      3,122    534
## 4    2019   1634      354      2,685    254
## 5    2015  1,263      235      1,547    336
## 6    2017    856      273       1785    346

Tidy and Transform Data

colnames(df)[1] = 'Year'

df = df[order(-df$Year),]
df$Adware = as.integer(gsub(",","",df$Adware))
df$Ransomware = as.integer(gsub(",","",df$Ransomware))
head(df)
##   Year Adware Backdoor Ransomware Trojan
## 1 2022   2242      356       2352    680
## 7 2021    945      195       2073    264
## 3 2020   1853      523       3122    534
## 4 2019   1634      354       2685    254
## 8 2018    735      152       1863    174
## 6 2017    856      273       1785    346

Analysis comparing the most cyber threats over 2017 to 2022

df$Total = rowSums(df[ , c(2, 3, 4, 5)], na.rm = TRUE)
head(df)
##   Year Adware Backdoor Ransomware Trojan Total
## 1 2022   2242      356       2352    680  5630
## 7 2021    945      195       2073    264  3477
## 3 2020   1853      523       3122    534  6032
## 4 2019   1634      354       2685    254  4927
## 8 2018    735      152       1863    174  2924
## 6 2017    856      273       1785    346  3260

Below, I have calculated the probability of cyber threats occurring from 2017 to 2022. As we can see, 2017 has a 12.42% probability, 2018 has a 11.14% probability, 2019 has a 18.77% probability, 2020 has a 22.98% probability, 2021 has a 13.25% probability, and 2022 has a 21.45% probability. Due to COVID-19 in 2018, 2019 and 2020, we can see an increase in cyber threats that quickly drops in 2021. Seeing as many employers kept their employees working remotely, the probability of receiving a cyber attack is higher than when employees were inhouse.

year_2022 = (5630/(5630+3477+6032+4927+2924+3260))*100
year_2021 = (3477/(5630+3477+6032+4927+2924+3260))*100
year_2020 = (6032/(5630+3477+6032+4927+2924+3260))*100
year_2019 = (4927/(5630+3477+6032+4927+2924+3260))*100
year_2018 = (2924/(5630+3477+6032+4927+2924+3260))*100
year_2017 = (3260/(5630+3477+6032+4927+2924+3260))*100

year_2022
## [1] 21.44762
year_2021
## [1] 13.24571
year_2020
## [1] 22.97905
year_2019
## [1] 18.76952
year_2018
## [1] 11.13905
year_2017
## [1] 12.41905