rm(list=ls())
# datasources: https://bdl.stat.gov.pl/bdl/dane/podgrup/teryt, https://bdl.stat.gov.pl/bdl/sta
data<-"C:/Users/Ada/Desktop/dane.csv"
DATA<-read.csv("C:/Users/Ada/Desktop/dane.csv", dec = ",", sep = ";")
any(is.na(DATA)) # there is no missing data
## [1] FALSE
View(DATA)
str(DATA)
## 'data.frame':    380 obs. of  6 variables:
##  $ Counties               : chr  "Powiat bolesławiecki" "Powiat dzierÅŒoniowski" "Powiat głogowski" "Powiat górowski" ...
##  $ Economic.crimes        : int  377 451 344 227 168 101 100 499 773 158 ...
##  $ Criminal.offenses      : int  2087 1161 1771 768 682 1296 545 2198 819 1273 ...
##  $ Population.density     : num  67.5 200.3 193.3 44.3 81.9 ...
##  $ Population.in.thousands: num  87.9 95.9 85.7 32.7 47.6 ...
##  $ Mean.Income            : num  6182 5724 5857 5423 6143 ...
summary(DATA)
##    Counties         Economic.crimes   Criminal.offenses Population.density
##  Length:380         Min.   :   28.0   Min.   :  111.0   Min.   :  18.20   
##  Class :character   1st Qu.:  166.8   1st Qu.:  510.5   1st Qu.:  57.85   
##  Mode  :character   Median :  320.0   Median :  769.0   Median :  89.80   
##                     Mean   :  695.5   Mean   : 1292.0   Mean   : 355.42   
##                     3rd Qu.:  604.0   3rd Qu.: 1276.0   3rd Qu.: 180.07   
##                     Max.   :24640.0   Max.   :39018.0   Max.   :3600.10   
##  Population.in.thousands  Mean.Income   
##  Min.   :  18.80         Min.   : 4669  
##  1st Qu.:  51.98         1st Qu.: 5411  
##  Median :  74.25         Median : 5671  
##  Mean   :  99.39         Mean   : 5821  
##  3rd Qu.: 108.17         3rd Qu.: 6031  
##  Max.   :1862.00         Max.   :11362
names(DATA)
## [1] "Counties"                "Economic.crimes"        
## [3] "Criminal.offenses"       "Population.density"     
## [5] "Population.in.thousands" "Mean.Income"
options(repos = c(CRAN = "https://cloud.r-project.org"))

install.packages("tinytex")
## Instalowanie pakietu w 'C:/Users/Ada/AppData/Local/R/win-library/4.3'
## (poniewaÅŒ 'lib' nie jest określony)
## pakiet 'tinytex' został pomyślnie rozpakowany oraz sumy MD5 zostały sprawdzone
## 
## Pobrane pakiety binarne są w
##  C:\Users\Ada\AppData\Local\Temp\RtmpYjAWxF\downloaded_packages
tinytex::install_tinytex(force = TRUE)
## installation of ggplot2 package - a convenient package of data visualization tools
install.packages("ggplot2")
## Instalowanie pakietu w 'C:/Users/Ada/AppData/Local/R/win-library/4.3'
## (poniewaÅŒ 'lib' nie jest określony)
## pakiet 'ggplot2' został pomyślnie rozpakowany oraz sumy MD5 zostały sprawdzone
## 
## Pobrane pakiety binarne są w
##  C:\Users\Ada\AppData\Local\Temp\RtmpYjAWxF\downloaded_packages
library(ggplot2)
## Warning: pakiet 'ggplot2' został zbudowany w wersji R 4.3.2
mean_economic <-mean(DATA$Economic.crimes)
mean_criminal <- mean(DATA$Criminal.offenses)
lmean_economic <- 10^mean(log10(DATA$Economic.crimes))
lmean_criminal <- 10^mean(log10(DATA$Criminal.offenses))

ggplot(DATA) +
  geom_point(aes(x = Population.in.thousands, y = Economic.crimes, color = "Economic Crime"), size = 1.5) + 
  geom_point(aes(x = Population.in.thousands, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) + 
  scale_y_log10() +
  scale_color_manual(values = c("skyblue", "violet")) +
  labs(title = "Scatter Plot of Crime Types by Population Size",
       x = "Population in thousands (log scale)",
       y = "Number of crimes (log scale)",
       color = "Crime Type") +
  theme_minimal()+
  geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth = 0.5) +
  geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth = 0.5) +
  annotate("text", x =  max(DATA$Population.in.thousands)*0.8 , y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
  annotate("text", x =  max(DATA$Population.in.thousands)*0.8, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")

ggplot(DATA) +
  geom_point(aes(x = Population.density, y = Economic.crimes, color = "Economic Crime"), size = 1.5) + 
  geom_point(aes(x = Population.density, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) +
  scale_y_log10() +
  scale_color_manual(values = c("skyblue", "violet")) +
  labs(title = "Scatter Plot of Crime Types by Population Density",
       x = "Population density",
       y = "Number of crimes (log scale)",
       color = "Crime Type") +
  theme_minimal()+
  geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth  = 0.5) +
  geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth  = 0.5) +
  annotate("text", x =  max(DATA$Population.density)*0.8, y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
  annotate("text", x =  max(DATA$Population.density)*0.8, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")

# logarithmic scale makes data visualization easier

ggplot(DATA) +
  geom_point(aes(x = Population.density, y = Economic.crimes, color = "Economic Crime"), size = 1.5) + 
  geom_point(aes(x = Population.density, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) +
  scale_x_log10() +
  scale_y_log10() +
  scale_color_manual(values = c("skyblue", "violet")) +
  labs(title = "Scatter Plot of Crime Types by Population Density",
       x = "Population density (log scale)",
       y = "Number of crimes (log scale)",
       color = "Crime Type") +
  theme_minimal()+
  geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth  = 0.5) +
  geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth  = 0.5) +
  annotate("text", x =  max(DATA$Population.density)*0.5 , y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
  annotate("text", x =  max(DATA$Population.density)*0.5, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")

ggplot(DATA) +
  geom_point(aes(x = Mean.Income, y = Economic.crimes, color = "Economic Crime"), size = 1.5) + 
  geom_point(aes(x = Mean.Income, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) + 
  scale_y_log10() +
  labs(title = "Scatter Plot of Crime Types by Mean Income",
       x = "Mean income (in county)",
       y = "Number of crimes (log scale)",
       color = "Crime type") +
  theme_minimal() +
  geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth  = 0.5) +
  geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth  = 0.5) +
  annotate("text", x =  max(DATA$Population.density)*3 , y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
  annotate("text", x =  max(DATA$Population.density)*3, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")