rm(list=ls())
# datasources: https://bdl.stat.gov.pl/bdl/dane/podgrup/teryt, https://bdl.stat.gov.pl/bdl/sta
data<-"C:/Users/Ada/Desktop/dane.csv"
DATA<-read.csv("C:/Users/Ada/Desktop/dane.csv", dec = ",", sep = ";")
any(is.na(DATA)) # there is no missing data
## [1] FALSE
View(DATA)
str(DATA)
## 'data.frame': 380 obs. of 6 variables:
## $ Counties : chr "Powiat bolesÅawiecki" "Powiat dzierÅŒoniowski" "Powiat gÅogowski" "Powiat górowski" ...
## $ Economic.crimes : int 377 451 344 227 168 101 100 499 773 158 ...
## $ Criminal.offenses : int 2087 1161 1771 768 682 1296 545 2198 819 1273 ...
## $ Population.density : num 67.5 200.3 193.3 44.3 81.9 ...
## $ Population.in.thousands: num 87.9 95.9 85.7 32.7 47.6 ...
## $ Mean.Income : num 6182 5724 5857 5423 6143 ...
summary(DATA)
## Counties Economic.crimes Criminal.offenses Population.density
## Length:380 Min. : 28.0 Min. : 111.0 Min. : 18.20
## Class :character 1st Qu.: 166.8 1st Qu.: 510.5 1st Qu.: 57.85
## Mode :character Median : 320.0 Median : 769.0 Median : 89.80
## Mean : 695.5 Mean : 1292.0 Mean : 355.42
## 3rd Qu.: 604.0 3rd Qu.: 1276.0 3rd Qu.: 180.07
## Max. :24640.0 Max. :39018.0 Max. :3600.10
## Population.in.thousands Mean.Income
## Min. : 18.80 Min. : 4669
## 1st Qu.: 51.98 1st Qu.: 5411
## Median : 74.25 Median : 5671
## Mean : 99.39 Mean : 5821
## 3rd Qu.: 108.17 3rd Qu.: 6031
## Max. :1862.00 Max. :11362
names(DATA)
## [1] "Counties" "Economic.crimes"
## [3] "Criminal.offenses" "Population.density"
## [5] "Population.in.thousands" "Mean.Income"
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("tinytex")
## Instalowanie pakietu w 'C:/Users/Ada/AppData/Local/R/win-library/4.3'
## (poniewaÅŒ 'lib' nie jest okreÅlony)
## pakiet 'tinytex' zostaÅ pomyÅlnie rozpakowany oraz sumy MD5 zostaÅy sprawdzone
##
## Pobrane pakiety binarne sÄ
w
## C:\Users\Ada\AppData\Local\Temp\RtmpYjAWxF\downloaded_packages
tinytex::install_tinytex(force = TRUE)
## installation of ggplot2 package - a convenient package of data visualization tools
install.packages("ggplot2")
## Instalowanie pakietu w 'C:/Users/Ada/AppData/Local/R/win-library/4.3'
## (poniewaÅŒ 'lib' nie jest okreÅlony)
## pakiet 'ggplot2' zostaÅ pomyÅlnie rozpakowany oraz sumy MD5 zostaÅy sprawdzone
##
## Pobrane pakiety binarne sÄ
w
## C:\Users\Ada\AppData\Local\Temp\RtmpYjAWxF\downloaded_packages
library(ggplot2)
## Warning: pakiet 'ggplot2' zostaÅ zbudowany w wersji R 4.3.2
mean_economic <-mean(DATA$Economic.crimes)
mean_criminal <- mean(DATA$Criminal.offenses)
lmean_economic <- 10^mean(log10(DATA$Economic.crimes))
lmean_criminal <- 10^mean(log10(DATA$Criminal.offenses))
ggplot(DATA) +
geom_point(aes(x = Population.in.thousands, y = Economic.crimes, color = "Economic Crime"), size = 1.5) +
geom_point(aes(x = Population.in.thousands, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) +
scale_y_log10() +
scale_color_manual(values = c("skyblue", "violet")) +
labs(title = "Scatter Plot of Crime Types by Population Size",
x = "Population in thousands (log scale)",
y = "Number of crimes (log scale)",
color = "Crime Type") +
theme_minimal()+
geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth = 0.5) +
geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth = 0.5) +
annotate("text", x = max(DATA$Population.in.thousands)*0.8 , y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
annotate("text", x = max(DATA$Population.in.thousands)*0.8, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")

ggplot(DATA) +
geom_point(aes(x = Population.density, y = Economic.crimes, color = "Economic Crime"), size = 1.5) +
geom_point(aes(x = Population.density, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) +
scale_y_log10() +
scale_color_manual(values = c("skyblue", "violet")) +
labs(title = "Scatter Plot of Crime Types by Population Density",
x = "Population density",
y = "Number of crimes (log scale)",
color = "Crime Type") +
theme_minimal()+
geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth = 0.5) +
geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth = 0.5) +
annotate("text", x = max(DATA$Population.density)*0.8, y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
annotate("text", x = max(DATA$Population.density)*0.8, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")

# logarithmic scale makes data visualization easier
ggplot(DATA) +
geom_point(aes(x = Population.density, y = Economic.crimes, color = "Economic Crime"), size = 1.5) +
geom_point(aes(x = Population.density, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) +
scale_x_log10() +
scale_y_log10() +
scale_color_manual(values = c("skyblue", "violet")) +
labs(title = "Scatter Plot of Crime Types by Population Density",
x = "Population density (log scale)",
y = "Number of crimes (log scale)",
color = "Crime Type") +
theme_minimal()+
geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth = 0.5) +
geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth = 0.5) +
annotate("text", x = max(DATA$Population.density)*0.5 , y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
annotate("text", x = max(DATA$Population.density)*0.5, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")

ggplot(DATA) +
geom_point(aes(x = Mean.Income, y = Economic.crimes, color = "Economic Crime"), size = 1.5) +
geom_point(aes(x = Mean.Income, y = Criminal.offenses, color = "Criminal offenses"), size = 1.5) +
scale_y_log10() +
labs(title = "Scatter Plot of Crime Types by Mean Income",
x = "Mean income (in county)",
y = "Number of crimes (log scale)",
color = "Crime type") +
theme_minimal() +
geom_hline(yintercept = lmean_economic, linetype = "dashed", color = "darkblue", linewidth = 0.5) +
geom_hline(yintercept = lmean_criminal, linetype = "dashed", color = "maroon", linewidth = 0.5) +
annotate("text", x = max(DATA$Population.density)*3 , y = lmean_economic, label = "MeanL_Economic crimes", vjust = -0.5, color = "darkblue") +
annotate("text", x = max(DATA$Population.density)*3, y = lmean_criminal, label = "MeanL_Criminal offenses", vjust = -0.5, color = "maroon")
