load("C:/Desktop/RWTH Aachen/Semester 1/Data & Information Management/Project/data_128.RData")
head(county_leukemia_128)
## id leuk_cases AverageAge GreenNeighborhoodIndex CarsPerHH PopDens
## 1 1 28 50 and older 0.55333137 0.62988964 0.15221254
## 2 2 38 50 and older 0.09487596 0.20972911 0.16145311
## 3 3 36 15-50 0.51076218 0.02426262 0.02274151
## 4 4 35 50 and older 0.22416091 0.34061953 0.16278211
## 5 5 37 15 and younger 0.30390560 0.51989769 0.33524316
## 6 6 36 15-50 0.21691189 0.78509375 0.39659359
## PctPopGrowth Dist2FracLoc_1 Dist2FracLoc_2 Dist2FracLoc_3 Dist2FracLoc_4
## 1 -0.80870577 5.831941 3.780389 3.877897 6.292500
## 2 0.07389212 8.909646 2.369199 2.870503 9.151515
## 3 -0.55979462 5.815866 2.429792 1.734616 5.740380
## 4 0.54392735 7.802678 2.594612 2.918953 8.116144
## 5 -0.59737725 9.109335 2.054256 2.561358 9.321347
## 6 -2.62724807 6.980251 4.158250 4.568718 7.307248
## Dist2FracLoc_5 Dist2FracLoc_6 Dist2FracLoc_7 Dist2FracLoc_8 Dist2FracLoc_9
## 1 2.6148888 4.8785487 0.6207211 2.098640 0.2504166
## 2 0.7315699 5.5741146 2.8456412 4.370153 2.1724450
## 3 4.8405903 0.4680598 3.4471759 2.651246 4.0512372
## 4 1.3709540 4.9903627 1.7744627 3.236860 1.2334271
## 5 0.9848940 5.3232384 2.9753540 4.389665 2.4008013
## 6 1.6170904 6.6084237 2.3871396 4.160840 1.3183907
## Dist2FracLoc_10 lon lat
## 1 2.629392 44.93403 -29.47574
## 2 5.208164 89.50712 -18.58449
## 3 2.857167 37.06287 32.51696
## 4 4.011806 73.29633 -20.26620
## 5 5.234740 90.39685 -12.98208
## 6 4.807845 69.74439 -48.33941
## geometry
## 1 44.87360, 47.24555, 46.32829, 44.13322, 42.49227, 44.87360, -24.49018, -31.99073, -33.89176, -33.34422, -28.11933, -24.49018
## 2 93.49703, 91.80541, 90.59801, 86.13883, 89.74325, 91.16961, 93.49703, -17.13538, -22.62803, -22.48408, -18.64491, -15.75014, -15.97666, -17.13538
## 3 40.59488, 39.87425, 38.44133, 36.42051, 35.23348, 36.26135, 36.66375, 40.59488, 30.47198, 28.94620, 27.27499, 26.82128, 26.65704, 35.66801, 35.47690, 30.47198
## 4 78.90321, 78.49617, 73.96722, 69.78430, 69.98401, 73.80859, 78.90321, -19.58987, -21.00295, -23.39622, -19.31405, -18.89552, -15.54791, -19.58987
## 5 91.518933, 91.169609, 89.743250, 85.631455, 89.920254, 91.518933, -9.250811, -15.976659, -15.750137, -9.768976, -7.570804, -9.250811
## 6 78.81867, 79.98972, 61.54178, 69.45060, 78.81867, -50.34643, -59.63708, -59.63708, -40.97035, -50.34643
sapply(county_leukemia_128, function(x) sum(is.na(x)))
## id leuk_cases AverageAge
## 0 0 0
## GreenNeighborhoodIndex CarsPerHH PopDens
## 0 0 0
## PctPopGrowth Dist2FracLoc_1 Dist2FracLoc_2
## 0 0 0
## Dist2FracLoc_3 Dist2FracLoc_4 Dist2FracLoc_5
## 0 0 0
## Dist2FracLoc_6 Dist2FracLoc_7 Dist2FracLoc_8
## 0 0 0
## Dist2FracLoc_9 Dist2FracLoc_10 lon
## 0 0 0
## lat geometry
## 0 0
#Create a new dataset with the data from the “county_leukemia_128” dataset for a better overview.
data <- county_leukemia_128[, c("leuk_cases", "AverageAge", "GreenNeighborhoodIndex", "CarsPerHH", "PopDens", "PctPopGrowth", "Dist2FracLoc_1", "Dist2FracLoc_2", "Dist2FracLoc_3", "Dist2FracLoc_4", "Dist2FracLoc_5", "Dist2FracLoc_6", "Dist2FracLoc_7", "Dist2FracLoc_8", "Dist2FracLoc_9", "Dist2FracLoc_10")]
summary(data)
## leuk_cases AverageAge GreenNeighborhoodIndex CarsPerHH
## Min. :23.00 15 and younger:186 Min. :0.0002333 Min. :0.001174
## 1st Qu.:32.00 15-50 :167 1st Qu.:0.2346412 1st Qu.:0.256101
## Median :35.00 50 and older :147 Median :0.4859838 Median :0.502197
## Mean :36.68 Mean :0.4862802 Mean :0.497339
## 3rd Qu.:41.00 3rd Qu.:0.7251946 3rd Qu.:0.749654
## Max. :52.00 Max. :0.9967076 Max. :0.999106
## PopDens PctPopGrowth Dist2FracLoc_1 Dist2FracLoc_2
## Min. :0.0000152 Min. :-6.9749 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.0600216 1st Qu.:-1.4347 1st Qu.: 2.938 1st Qu.: 2.986
## Median :0.1753587 Median :-0.2529 Median : 4.128 Median : 4.690
## Mean :0.2427995 Mean :-0.4161 Mean : 4.626 Mean : 4.835
## 3rd Qu.:0.3783928 3rd Qu.: 0.6439 3rd Qu.: 6.410 3rd Qu.: 6.770
## Max. :0.9666323 Max. : 4.9987 Max. :10.000 Max. :10.000
## Dist2FracLoc_3 Dist2FracLoc_4 Dist2FracLoc_5 Dist2FracLoc_6
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2.868 1st Qu.: 2.547 1st Qu.: 3.373 1st Qu.: 3.075
## Median : 4.476 Median : 3.938 Median : 5.260 Median : 4.846
## Mean : 4.587 Mean : 4.566 Mean : 5.245 Mean : 4.760
## 3rd Qu.: 6.374 3rd Qu.: 6.627 3rd Qu.: 7.139 3rd Qu.: 6.249
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## Dist2FracLoc_7 Dist2FracLoc_8 Dist2FracLoc_9 Dist2FracLoc_10
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2.477 1st Qu.: 2.759 1st Qu.: 2.426 1st Qu.: 3.043
## Median : 4.010 Median : 4.278 Median : 4.217 Median : 4.639
## Mean : 4.433 Mean : 4.608 Mean : 4.443 Mean : 4.771
## 3rd Qu.: 6.161 3rd Qu.: 6.123 3rd Qu.: 6.129 3rd Qu.: 6.124
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
Or use psych package for overview.
library(psych)
describe(data)
## vars n mean sd median trimmed mad min max
## leuk_cases 1 500 36.68 6.03 35.00 36.33 5.93 23.00 52.00
## AverageAge* 2 500 1.92 0.81 2.00 1.90 1.48 1.00 3.00
## GreenNeighborhoodIndex 3 500 0.49 0.29 0.49 0.49 0.36 0.00 1.00
## CarsPerHH 4 500 0.50 0.28 0.50 0.50 0.37 0.00 1.00
## PopDens 5 500 0.24 0.21 0.18 0.22 0.20 0.00 0.97
## PctPopGrowth 6 500 -0.42 1.79 -0.25 -0.34 1.52 -6.97 5.00
## Dist2FracLoc_1 7 500 4.63 2.46 4.13 4.52 2.29 0.00 10.00
## Dist2FracLoc_2 8 500 4.84 2.43 4.69 4.81 2.74 0.00 10.00
## Dist2FracLoc_3 9 500 4.59 2.36 4.48 4.54 2.61 0.00 10.00
## Dist2FracLoc_4 10 500 4.57 2.57 3.94 4.46 2.62 0.00 10.00
## Dist2FracLoc_5 11 500 5.24 2.52 5.26 5.26 2.80 0.00 10.00
## Dist2FracLoc_6 12 500 4.76 2.19 4.85 4.74 2.29 0.00 10.00
## Dist2FracLoc_7 13 500 4.43 2.42 4.01 4.31 2.57 0.00 10.00
## Dist2FracLoc_8 14 500 4.61 2.36 4.28 4.49 2.39 0.00 10.00
## Dist2FracLoc_9 15 500 4.44 2.44 4.22 4.35 2.75 0.00 10.00
## Dist2FracLoc_10 16 500 4.77 2.30 4.64 4.68 2.34 0.00 10.00
## range skew kurtosis se
## leuk_cases 29.00 0.49 -0.54 0.27
## AverageAge* 2.00 0.14 -1.48 0.04
## GreenNeighborhoodIndex 1.00 0.01 -1.21 0.01
## CarsPerHH 1.00 -0.01 -1.20 0.01
## PopDens 0.97 0.98 0.24 0.01
## PctPopGrowth 11.97 -0.41 0.63 0.08
## Dist2FracLoc_1 10.00 0.42 -0.72 0.11
## Dist2FracLoc_2 10.00 0.09 -0.85 0.11
## Dist2FracLoc_3 10.00 0.16 -0.68 0.11
## Dist2FracLoc_4 10.00 0.37 -0.95 0.11
## Dist2FracLoc_5 10.00 -0.02 -0.91 0.11
## Dist2FracLoc_6 10.00 0.04 -0.58 0.10
## Dist2FracLoc_7 10.00 0.42 -0.83 0.11
## Dist2FracLoc_8 10.00 0.43 -0.69 0.11
## Dist2FracLoc_9 10.00 0.30 -0.85 0.11
## Dist2FracLoc_10 10.00 0.31 -0.68 0.10
##Correlation analysis
library(dplyr)
##
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
data$AverageAge <- as.numeric(recode(data$AverageAge, "15 and younger" = 10, "15-50" = 32.5, "50 and older" = 65))
cor_matrix <- cor(data)
print(cor_matrix)
## leuk_cases AverageAge GreenNeighborhoodIndex
## leuk_cases 1.000000000 0.001595315 0.01254171
## AverageAge 0.001595315 1.000000000 0.08096118
## GreenNeighborhoodIndex 0.012541706 0.080961179 1.00000000
## CarsPerHH -0.029098402 -0.001296003 -0.01262389
## PopDens 0.002928462 0.012791776 -0.03060715
## PctPopGrowth 0.263413021 0.023532482 -0.01839072
## Dist2FracLoc_1 -0.303019494 0.041464072 -0.07193474
## Dist2FracLoc_2 0.667470299 -0.069501465 0.09406828
## Dist2FracLoc_3 0.665163353 -0.077272975 0.09215445
## Dist2FracLoc_4 -0.451915924 0.047496920 -0.07584284
## Dist2FracLoc_5 0.609088090 -0.055065381 0.07312433
## Dist2FracLoc_6 0.648532736 -0.098655144 0.06787217
## Dist2FracLoc_7 0.872528734 -0.082516591 0.07325840
## Dist2FracLoc_8 0.928650786 -0.099445337 0.06745598
## Dist2FracLoc_9 0.789691764 -0.066896478 0.07188707
## Dist2FracLoc_10 0.912249525 -0.096761561 0.05830868
## CarsPerHH PopDens PctPopGrowth Dist2FracLoc_1
## leuk_cases -0.029098402 2.928462e-03 0.263413021 -0.30301949
## AverageAge -0.001296003 1.279178e-02 0.023532482 0.04146407
## GreenNeighborhoodIndex -0.012623888 -3.060715e-02 -0.018390720 -0.07193474
## CarsPerHH 1.000000000 6.538893e-01 -0.070490098 -0.12691925
## PopDens 0.653889292 1.000000e+00 -0.007184758 -0.05856997
## PctPopGrowth -0.070490098 -7.184758e-03 1.000000000 0.11664700
## Dist2FracLoc_1 -0.126919253 -5.856997e-02 0.116647004 1.00000000
## Dist2FracLoc_2 0.075872336 5.449307e-02 0.023886772 -0.81212328
## Dist2FracLoc_3 0.068587914 5.626745e-02 0.045493539 -0.73601885
## Dist2FracLoc_4 -0.106674289 -5.375889e-02 0.063460900 0.97444279
## Dist2FracLoc_5 0.043008216 2.548995e-02 -0.004166902 -0.75789500
## Dist2FracLoc_6 0.012488537 4.300793e-02 0.210173960 -0.26970857
## Dist2FracLoc_7 -0.033819984 4.501406e-03 0.150458073 -0.44998740
## Dist2FracLoc_8 -0.042852013 1.090878e-02 0.204104607 -0.29970650
## Dist2FracLoc_9 -0.027035882 8.064044e-05 0.121025587 -0.49101979
## Dist2FracLoc_10 -0.059678938 6.422768e-03 0.243320308 -0.15812974
## Dist2FracLoc_2 Dist2FracLoc_3 Dist2FracLoc_4
## leuk_cases 0.66747030 0.66516335 -0.45191592
## AverageAge -0.06950146 -0.07727298 0.04749692
## GreenNeighborhoodIndex 0.09406828 0.09215445 -0.07584284
## CarsPerHH 0.07587234 0.06858791 -0.10667429
## PopDens 0.05449307 0.05626745 -0.05375889
## PctPopGrowth 0.02388677 0.04549354 0.06346090
## Dist2FracLoc_1 -0.81212328 -0.73601885 0.97444279
## Dist2FracLoc_2 1.00000000 0.98623389 -0.84654073
## Dist2FracLoc_3 0.98623389 1.00000000 -0.76023933
## Dist2FracLoc_4 -0.84654073 -0.76023933 1.00000000
## Dist2FracLoc_5 0.69535159 0.58670645 -0.87586048
## Dist2FracLoc_6 0.72825021 0.81983694 -0.29917426
## Dist2FracLoc_7 0.65899878 0.60352816 -0.62073901
## Dist2FracLoc_8 0.67005991 0.66758452 -0.46222452
## Dist2FracLoc_9 0.59730577 0.51465621 -0.66075023
## Dist2FracLoc_10 0.57899784 0.59363402 -0.32511914
## Dist2FracLoc_5 Dist2FracLoc_6 Dist2FracLoc_7
## leuk_cases 0.609088090 0.64853274 0.872528734
## AverageAge -0.055065381 -0.09865514 -0.082516591
## GreenNeighborhoodIndex 0.073124334 0.06787217 0.073258402
## CarsPerHH 0.043008216 0.01248854 -0.033819984
## PopDens 0.025489952 0.04300793 0.004501406
## PctPopGrowth -0.004166902 0.21017396 0.150458073
## Dist2FracLoc_1 -0.757894997 -0.26970857 -0.449987403
## Dist2FracLoc_2 0.695351595 0.72825021 0.658998775
## Dist2FracLoc_3 0.586706453 0.81983694 0.603528158
## Dist2FracLoc_4 -0.875860480 -0.29917426 -0.620739014
## Dist2FracLoc_5 1.000000000 0.17313755 0.844247465
## Dist2FracLoc_6 0.173137547 1.00000000 0.435114211
## Dist2FracLoc_7 0.844247465 0.43511421 1.000000000
## Dist2FracLoc_8 0.646717030 0.67227640 0.935019416
## Dist2FracLoc_9 0.906933491 0.26646655 0.978213426
## Dist2FracLoc_10 0.531716702 0.68459464 0.881637341
## Dist2FracLoc_8 Dist2FracLoc_9 Dist2FracLoc_10
## leuk_cases 0.92865079 7.896918e-01 0.912249525
## AverageAge -0.09944534 -6.689648e-02 -0.096761561
## GreenNeighborhoodIndex 0.06745598 7.188707e-02 0.058308683
## CarsPerHH -0.04285201 -2.703588e-02 -0.059678938
## PopDens 0.01090878 8.064044e-05 0.006422768
## PctPopGrowth 0.20410461 1.210256e-01 0.243320308
## Dist2FracLoc_1 -0.29970650 -4.910198e-01 -0.158129736
## Dist2FracLoc_2 0.67005991 5.973058e-01 0.578997839
## Dist2FracLoc_3 0.66758452 5.146562e-01 0.593634022
## Dist2FracLoc_4 -0.46222452 -6.607502e-01 -0.325119144
## Dist2FracLoc_5 0.64671703 9.069335e-01 0.531716702
## Dist2FracLoc_6 0.67227640 2.664665e-01 0.684594636
## Dist2FracLoc_7 0.93501942 9.782134e-01 0.881637341
## Dist2FracLoc_8 1.00000000 8.446940e-01 0.987215938
## Dist2FracLoc_9 0.84469400 1.000000e+00 0.774873197
## Dist2FracLoc_10 0.98721594 7.748732e-01 1.000000000
library(corrplot)
## Warning: Paket 'corrplot' wurde unter R Version 4.4.2 erstellt
## corrplot 0.95 loaded
corrplot(cor_matrix, method = "color", type = "upper", tl.col = "black", tl.srt = 45, tl.cex = 0.5)
# 2. Plot your variables using maps and pairs plots Check coordinate
system and transform if necessary
library(sf)
## Warning: Paket 'sf' wurde unter R Version 4.4.2 erstellt
## Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library(ggplot2)
## Warning: Paket 'ggplot2' wurde unter R Version 4.4.2 erstellt
##
## Attache Paket: 'ggplot2'
## Die folgenden Objekte sind maskiert von 'package:psych':
##
## %+%, alpha
st_crs(county_leukemia_128)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
county_leukemia_128 <- st_transform(county_leukemia_128, crs = 4326)
Plot map
ggplot(data = county_leukemia_128) +
geom_sf(aes(fill = leuk_cases), color = "black") +
scale_fill_viridis_c(option = "plasma") + # color
labs(title = "Leuk_cases",
fill = "Cases") +
theme_minimal()
ggplot(data = county_leukemia_128) +
geom_sf(aes(fill = GreenNeighborhoodIndex), color = "black") +
scale_fill_gradient(low = "lightgreen", high = "darkgreen") +
labs(title = "Green Neighborhood index by district", fill = "Green Index") +
theme_minimal()
Population density (PopDens):
ggplot(data = county_leukemia_128) +
geom_sf(aes(fill = PopDens), color = "black") +
scale_fill_viridis_c(option = "inferno") +
labs(title = "Population density", fill = "Density") +
theme_minimal()
Map by age group
ggplot(data = county_leukemia_128) +
geom_sf(aes(fill = AverageAge), color = "black") +
scale_fill_manual(values = c("15 and younger" = "lightblue",
"15-50" = "orange",
"50 and older" = "red")) +
labs(title = "Distribution of the average age",
fill = "Average Age") +
theme_minimal()
Leukemia cases by age group
ggplot(county_leukemia_128, aes(x = AverageAge, y = leuk_cases, fill = AverageAge)) +
geom_boxplot() +
scale_fill_manual(values = c("15 and younger" = "lightblue",
"15-50" = "orange",
"50 and older" = "red")) +
labs(title = "Leukemia cases by age group",
x = "Average Age",
y = "Leukemia cases") +
theme_minimal()
library(GGally)
## Warning: Paket 'GGally' wurde unter R Version 4.4.2 erstellt
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(county_leukemia_128,
columns = c("leuk_cases", "GreenNeighborhoodIndex", "CarsPerHH", "PopDens", "PctPopGrowth"),
aes(color = AverageAge, alpha = 0.7)) +
labs(title = "Pairs plot for numeric variables by age group")
# Extract only the distance columns
dist_columns <- grep("Dist2FracLoc", colnames(county_leukemia_128), value = TRUE)
# Check the structure of the distances (should be numerical)
head(county_leukemia_128[, dist_columns])
## Simple feature collection with 6 features and 10 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 35.23348 ymin: -59.63708 xmax: 93.49703 ymax: 35.66801
## Geodetic CRS: WGS 84
## Dist2FracLoc_1 Dist2FracLoc_2 Dist2FracLoc_3 Dist2FracLoc_4 Dist2FracLoc_5
## 1 5.831941 3.780389 3.877897 6.292500 2.6148888
## 2 8.909646 2.369199 2.870503 9.151515 0.7315699
## 3 5.815866 2.429792 1.734616 5.740380 4.8405903
## 4 7.802678 2.594612 2.918953 8.116144 1.3709540
## 5 9.109335 2.054256 2.561358 9.321347 0.9848940
## 6 6.980251 4.158250 4.568718 7.307248 1.6170904
## Dist2FracLoc_6 Dist2FracLoc_7 Dist2FracLoc_8 Dist2FracLoc_9 Dist2FracLoc_10
## 1 4.8785487 0.6207211 2.098640 0.2504166 2.629392
## 2 5.5741146 2.8456412 4.370153 2.1724450 5.208164
## 3 0.4680598 3.4471759 2.651246 4.0512372 2.857167
## 4 4.9903627 1.7744627 3.236860 1.2334271 4.011806
## 5 5.3232384 2.9753540 4.389665 2.4008013 5.234740
## 6 6.6084237 2.3871396 4.160840 1.3183907 4.807845
## geometry
## 1 POLYGON ((44.8736 -24.49018...
## 2 POLYGON ((93.49703 -17.1353...
## 3 POLYGON ((40.59488 30.47198...
## 4 POLYGON ((78.90321 -19.5898...
## 5 POLYGON ((91.51893 -9.25081...
## 6 POLYGON ((78.81867 -50.3464...
# Calculate the minimum distance to a fracking site
county_leukemia_128$MinDist2Frac <- apply(county_leukemia_128[, dist_columns], 1, function(x) min(as.numeric(x), na.rm = TRUE))
# Check whether the calculation has worked
summary(county_leukemia_128$MinDist2Frac)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.7436 1.3410 1.4750 2.1443 3.8535
# Check the first values of the calculated distance
head(county_leukemia_128$MinDist2Frac)
## [1] 0.2504166 0.7315699 0.4680598 1.2334271 0.9848940 1.3183907
library(ggplot2)
library(viridis)
## Warning: Paket 'viridis' wurde unter R Version 4.4.2 erstellt
## Lade nötiges Paket: viridisLite
library(sf)
# Plot of the map with leukemia cases and distance to fracking sites
ggplot(data = county_leukemia_128) +
# Plot the geometry of the districts (polygons of the districts)
geom_sf(aes(fill = leuk_cases), color = "black") +
# Plot points for fracking sites (with color for distance)
geom_point(aes(x = lon, y = lat, color = MinDist2Frac), alpha = 0.6) +
# Color scales for leukemia cases and distance to fracking sites
scale_fill_viridis_c(option = "plasma") + # Color scale for leukemia cases
scale_color_viridis_c(option = "magma") + # Color scale for distances
# Adding labels and titles
labs(
title = "Geographical pattern of leukemia cases and proximity to fracking sites",
fill = "Leukemia cases",
color = "Distance to fracking"
) +
theme_minimal()
library(ggplot2)
library(viridis)
library(sf)
shape_map <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) # Allocation of forms 1-10 for the fracking sites
# Plot of the map with leukemia cases and distance to fracking sites
ggplot(data = county_leukemia_128) +
# Plot the geometry of the districts (polygons of the districts)
geom_sf(aes(fill = leuk_cases), color = "black") +
# Fracking sites as white dots with different shapes and thicker edges
geom_sf(data = fracking_locations_128, aes(color = "white"),
size = 4, # Size of the fracking sites shapes
shape = shape_map[fracking_locations_128$FracLoc] # Different forms for each fracking site ID
) +
scale_fill_viridis_c(option = "plasma", direction = -1) + # Color scales for leukemia cases
# Manual shape assignment for the fracking sites
scale_shape_manual(values = shape_map) +
labs(
title = "Geographical pattern of leukemia cases and fracking sites",
fill = "Leukemia cases",
shape = "Fracking Sites"
) +
theme_minimal()
library(ggplot2)
library(viridis)
library(sf)
library(dplyr)
# 1. calculate the correlation between the fracking sites and the leukemia cases
# Here we assume that the column 'Dist2FracLoc_*' in the data set contains the distances to the fracking sites.
# We need to calculate the correlation of the leukemia cases with the distances to the fracking sites.
leuk_cases_values <- county_leukemia_128$leuk_cases
cor_values <- sapply(1:10, function(i) cor(leuk_cases_values, county_leukemia_128[[paste("Dist2FracLoc_", i, sep = "")]], use = "complete.obs"))
# 2. Filter only the fracking sites with a correlation < 0
positive_corr_indices <- which(cor_values < 0)
# Create a new data set with the filtered fracking locations
filtered_fracking <- fracking_locations_128[positive_corr_indices, ]
shape_map <- 1:length(positive_corr_indices) # Shapes for the fracking sites
ggplot(data = county_leukemia_128) +
geom_sf(aes(fill = leuk_cases), color = "black") +
# Show filtered fracking sites with different shapes
geom_sf(
data = filtered_fracking,
aes(geometry = geometry, shape = as.factor(FracLoc)), # Different shapes for locations
size = 5,
color = "green",
stroke = 1.5
) +
scale_fill_viridis_c(option = "plasma", direction = -1) +
# Manual shape allocation for fracking sites
scale_shape_manual(values = shape_map) +
labs(
title = "Leukemia cases and fracking sites with negative correlation < -0.25",
fill = "Leukemia cases",
shape = "Fracking sites" # Legend for the shapes
) +
theme_minimal() +
# Position the legend under the graphic and remove NA
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal"
) +
# Remove the NA value from the legend
scale_shape_manual(values = shape_map) +
guides(shape = guide_legend(override.aes = list(shape = shape_map)))
## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.
library(ggplot2)
library(viridis)
library(sf)
# Plot of the map with PctPopGrowth as color scale
ggplot(data = county_leukemia_128) +
# Coloring the geometry of districts based on population growth
geom_sf(aes(fill = PctPopGrowth), color = "black") +
# Color scale for population growth
scale_fill_viridis_c(option = "magma", name = "Population growth (%)") +
labs(
title = "Population growth in the districts",
fill = "Population growth (%)"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
legend.key = element_rect(fill = "white", colour = "black")
)
library(ggplot2)
library(viridis)
library(sf)
shape_map <- c(1:10) # Various shapes for fracking sites
ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = PctPopGrowth), color = "black") +
# Color scale for population growth
scale_fill_viridis_c(option = "magma", name = "Population growth (%)", direction = -1) +
geom_sf(data = fracking_locations_128,
aes(shape = factor(FracLoc)), # Shape based on the FracLoc ID
color = "green", size = 3,
show.legend = TRUE) +
# Manual shape assignment for fracking sites (1-10 shapes)
scale_shape_manual(values = shape_map) +
labs(
title = "Population growth and fracking sites",
fill = "Population growth (%)",
shape = "Fracking site"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal",
legend.margin = margin(t = 8)
)
library(ggplot2)
library(viridis)
library(sf)
library(gridExtra)
## Warning: Paket 'gridExtra' wurde unter R Version 4.4.2 erstellt
##
## Attache Paket: 'gridExtra'
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## combine
leukemia_map <- ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = leuk_cases), color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Leukemia cases", direction = -1) +
labs(
title = "Leukemia cases per district",
fill = "Leukemia cases"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
popgrowth_map <- ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = PctPopGrowth), color = "black") +
scale_fill_viridis_c(option = "magma", name = "Population growth (%)", direction = -1) +
labs(
title = "Population growth per district",
fill = "Population growth (%)"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
# Grafics side by side
grid.arrange(leukemia_map, popgrowth_map, ncol = 2)
#- Number of Leukemia Cases per population und dann übersicht ob es ansteigt wenn mehr Leute da sind #- Vergleiche am Anfang den Average mit der Standardabweichung (am besten mit describe())
library(ggplot2)
library(viridis)
library(sf)
shape_map <- c(1:10) # Various shapes for fracking sites
ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = GreenNeighborhoodIndex), color = "black") +
# Color scale for Green Index
scale_fill_gradient(low = "lightgreen", high = "darkgreen", name = "Green NeighborhoodIndex") +
geom_sf(data = fracking_locations_128,
aes(shape = factor(FracLoc)), # Shape based on the FracLoc ID
color = "blue", size = 5,
show.legend = TRUE) +
# Manual shape assignment for fracking sites (1-10 shapes)
scale_shape_manual(values = shape_map) +
labs(
title = "Green Neighborhood Index and Fracking Sites",
fill = "GreenNeighborhoodIndex",
shape = "Fracking site"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal",
legend.margin = margin(t = 10)
)
library(ggplot2)
library(viridis)
library(sf)
library(gridExtra)
leukemia_map <- ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = leuk_cases), color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Leukemia cases") +
labs(
title = "Leukemia cases per district",
fill = "Leukemia cases"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
popgrowth_map <- ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = CarsPerHH), color = "black") +
scale_fill_viridis_c(option = "magma", name = "Cars per HH") +
labs(
title = "Cars per Household",
fill = "Cars per HH"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
# Grafics side by side
grid.arrange(leukemia_map, popgrowth_map, ncol = 2)
library(ggplot2)
library(viridis)
library(sf)
shape_map <- c(1:10) # Various shapes for fracking sites
ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = leuk_cases), color = "black") +
# Color scale for population growth
scale_fill_viridis_c(option = "magma", name = "Leukemia Cases", direction = -1) +
geom_sf(data = fracking_locations_128,
aes(shape = factor(FracLoc)), # Shape based on the FracLoc ID
color = "green", size = 3,
show.legend = TRUE) +
# Manual shape assignment for fracking sites (1-10 shapes)
scale_shape_manual(values = shape_map) +
labs(
title = "Leukemia Cases and fracking sites",
fill = "Leukemia Cases",
shape = "Fracking site"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal",
legend.margin = margin(t = 8)
)
shape_map <- c(1:10) # Various shapes for fracking sites
ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = AverageAge), color = "black") +
# Color scale for population growth
scale_fill_manual(values = c("15 and younger" = "lightblue",
"15-50" = "orange",
"50 and older" = "red", name = "Average Age")) +
geom_sf(data = fracking_locations_128,
aes(shape = factor(FracLoc)), # Shape based on the FracLoc ID
color = "green", size = 5,
stroke = 1.5,
show.legend = TRUE) +
# Manual shape assignment for fracking sites (1-10 shapes)
scale_shape_manual(values = shape_map) +
labs(
fill = "Average Age",
shape = "Fracking site"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 6),
legend.text = element_text(size = 4),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal",
legend.margin = margin(t = 6)
)
library(ggplot2)
library(viridis)
library(sf)
shape_map <- c(1:10) # Various shapes for fracking sites
ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = PopDens), color = "black") +
# Color scale for population growth
scale_fill_viridis_c(option = "magma", name = "PopDens", direction = -1) +
geom_sf(data = fracking_locations_128,
aes(shape = factor(FracLoc)), # Shape based on the FracLoc ID
color = "green", size = 5, stroke = 1.5,
show.legend = TRUE) +
# Manual shape assignment for fracking sites (1-10 shapes)
scale_shape_manual(values = shape_map) +
labs(
title = "Population Density and fracking sites",
fill = "Population Density",
shape = "Fracking site"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal",
legend.margin = margin(t = 8)
)
library(ggplot2)
library(viridis)
library(sf)
shape_map <- c(1:10) # Various shapes for fracking sites
ggplot() +
geom_sf(data = county_leukemia_128, aes(fill = CarsPerHH), color = "black") +
# Color scale for population growth
scale_fill_viridis_c(option = "magma", name = "Cars per HH", direction = -1) +
geom_sf(data = fracking_locations_128,
aes(shape = factor(FracLoc)), # Shape based on the FracLoc ID
color = "green", size = 5, stroke = 1.5,
show.legend = TRUE) +
# Manual shape assignment for fracking sites (1-10 shapes)
scale_shape_manual(values = shape_map) +
labs(
title = "Cars per HH and fracking sites",
fill = "Cars per HH",
shape = "Fracking site"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.key = element_rect(fill = "white", colour = "black"),
legend.box = "horizontal",
legend.margin = margin(t = 8)
)