1 + 1[1] 2
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
1 + 1[1] 2
You can add options to executable code like this
[1] 4
The echo: false option disables the printing of code (only output is displayed).
library(readr)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
df <- read_csv("~/Desktop/water_pollution_disease.csv")Rows: 3000 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Country, Region, Water Source Type, Water Treatment Method
dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
river_data <- df %>%
filter(`Water Source Type` == "River")
summary(river_data) Country Region Year Water Source Type
Length:538 Length:538 Min. :2000 Length:538
Class :character Class :character 1st Qu.:2005 Class :character
Mode :character Mode :character Median :2012 Mode :character
Mean :2012
3rd Qu.:2018
Max. :2024
Contaminant Level (ppm) pH Level Turbidity (NTU)
Min. :0.010 Min. :6.010 Min. :0.010
1st Qu.:2.530 1st Qu.:6.640 1st Qu.:1.340
Median :4.995 Median :7.280 Median :2.585
Mean :4.953 Mean :7.265 Mean :2.547
3rd Qu.:7.428 3rd Qu.:7.918 3rd Qu.:3.690
Max. :9.950 Max. :8.490 Max. :4.990
Dissolved Oxygen (mg/L) Nitrate Level (mg/L) Lead Concentration (µg/L)
Min. : 3.000 Min. : 0.06 Min. : 0.010
1st Qu.: 4.737 1st Qu.:12.78 1st Qu.: 5.320
Median : 6.200 Median :25.97 Median : 9.670
Mean : 6.420 Mean :25.51 Mean : 9.875
3rd Qu.: 8.300 3rd Qu.:38.31 3rd Qu.:14.752
Max. :10.000 Max. :49.92 Max. :19.940
Bacteria Count (CFU/mL) Water Treatment Method
Min. : 10 Length:538
1st Qu.:1201 Class :character
Median :2380 Mode :character
Mean :2473
3rd Qu.:3843
Max. :4994
Access to Clean Water (% of Population) Diarrheal Cases per 100,000 people
Min. :30.08 Min. : 0.0
1st Qu.:48.24 1st Qu.:109.2
Median :64.12 Median :239.0
Mean :64.58 Mean :242.0
3rd Qu.:81.37 3rd Qu.:371.5
Max. :99.94 Max. :499.0
Cholera Cases per 100,000 people Typhoid Cases per 100,000 people
Min. : 0.00 Min. : 0.00
1st Qu.:14.00 1st Qu.:23.00
Median :25.50 Median :48.00
Mean :25.27 Mean :49.07
3rd Qu.:38.00 3rd Qu.:73.75
Max. :49.00 Max. :99.00
Infant Mortality Rate (per 1,000 live births) GDP per Capita (USD)
Min. : 2.06 Min. : 572
1st Qu.:27.94 1st Qu.:26855
Median :53.01 Median :51919
Mean :52.15 Mean :51433
3rd Qu.:76.35 3rd Qu.:77048
Max. :99.88 Max. :99947
Healthcare Access Index (0-100) Urbanization Rate (%)
Min. : 0.19 Min. :10.10
1st Qu.:25.14 1st Qu.:29.34
Median :50.33 Median :48.45
Mean :50.42 Mean :49.25
3rd Qu.:76.04 3rd Qu.:69.06
Max. :99.74 Max. :89.96
Sanitation Coverage (% of Population) Rainfall (mm per year) Temperature (°C)
Min. :20.01 Min. : 205.0 Min. : 0.150
1st Qu.:41.34 1st Qu.: 889.8 1st Qu.: 9.912
Median :60.17 Median :1634.0 Median :21.165
Mean :61.08 Mean :1601.0 Mean :20.562
3rd Qu.:81.28 3rd Qu.:2320.5 3rd Qu.:31.485
Max. :99.87 Max. :2993.0 Max. :39.980
Population Density (people per km²)
Min. : 10.0
1st Qu.:278.8
Median :529.5
Mean :521.6
3rd Qu.:765.8
Max. :999.0
library(dplyr)
colnames(river_data)[which(names(river_data) == "Nitrate Level (mg/L)")] <- "Nitrate"
colnames(river_data)[which(names(river_data) == "Dissolved Oxygen (mg/L)")] <- "DissolvedOxygen"
colnames(river_data)[which(names(river_data) == "pH Level")] <- "pH"
colnames(river_data)[which(names(river_data) == "Turbidity (NTU)")] <- "Turbidity"
colnames(river_data)[which(names(river_data) == "Temperature (°C)")] <- "Temperature"
colnames(river_data)[which(names(river_data) == "Rainfall (mm per year)")] <- "Rainfall"
colnames(river_data)[which(names(river_data) == "Bacteria Count (CFU/mL)")] <- "Bacteria"
colnames(river_data)[which(names(river_data) == "Lead Concentration (µg/L)")] <- "Lead"multi_data <- river_data %>%
select(DissolvedOxygen, Nitrate, pH, Turbidity, Temperature, Rainfall, Bacteria, Lead) %>%
filter(if_all(everything(), ~ !is.na(.)))model_multi <- lm(DissolvedOxygen ~ Nitrate + pH + Turbidity + Temperature + Rainfall + Bacteria + Lead,
data = multi_data)
summary(model_multi)
Call:
lm(formula = DissolvedOxygen ~ Nitrate + pH + Turbidity + Temperature +
Rainfall + Bacteria + Lead, data = multi_data)
Residuals:
Min 1Q Median 3Q Max
-3.8303 -1.7388 -0.2013 1.8426 3.9137
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.128e+00 9.755e-01 6.282 6.97e-10 ***
Nitrate -1.770e-02 6.212e-03 -2.849 0.00456 **
pH 4.962e-02 1.229e-01 0.404 0.68663
Turbidity -2.686e-02 6.247e-02 -0.430 0.66734
Temperature 5.150e-03 7.508e-03 0.686 0.49306
Rainfall 6.189e-05 1.092e-04 0.567 0.57110
Bacteria 5.045e-05 6.062e-05 0.832 0.40562
Lead 1.230e-02 1.583e-02 0.777 0.43750
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.054 on 530 degrees of freedom
Multiple R-squared: 0.01924, Adjusted R-squared: 0.006291
F-statistic: 1.486 on 7 and 530 DF, p-value: 0.1697
plot(model_multi, which = 1)shapiro.test(residuals(model_multi))
Shapiro-Wilk normality test
data: residuals(model_multi)
W = 0.95445, p-value = 7.817e-12
plot(model_multi, which = 4)model <- lm(
DissolvedOxygen ~ Nitrate + pH + Turbidity + Temperature + Rainfall + Bacteria + Lead,
data = multi_data
)
summary(model)
Call:
lm(formula = DissolvedOxygen ~ Nitrate + pH + Turbidity + Temperature +
Rainfall + Bacteria + Lead, data = multi_data)
Residuals:
Min 1Q Median 3Q Max
-3.8303 -1.7388 -0.2013 1.8426 3.9137
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.128e+00 9.755e-01 6.282 6.97e-10 ***
Nitrate -1.770e-02 6.212e-03 -2.849 0.00456 **
pH 4.962e-02 1.229e-01 0.404 0.68663
Turbidity -2.686e-02 6.247e-02 -0.430 0.66734
Temperature 5.150e-03 7.508e-03 0.686 0.49306
Rainfall 6.189e-05 1.092e-04 0.567 0.57110
Bacteria 5.045e-05 6.062e-05 0.832 0.40562
Lead 1.230e-02 1.583e-02 0.777 0.43750
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.054 on 530 degrees of freedom
Multiple R-squared: 0.01924, Adjusted R-squared: 0.006291
F-statistic: 1.486 on 7 and 530 DF, p-value: 0.1697
shapiro.test(residuals(model))
Shapiro-Wilk normality test
data: residuals(model)
W = 0.95445, p-value = 7.817e-12
summary(model_multi)$coefficients["Nitrate", "Pr(>|t|)"][1] 0.004558633
library(car)Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
vif(model_multi) Nitrate pH Turbidity Temperature Rainfall Bacteria
1.009911 1.018408 1.008851 1.008609 1.006151 1.013103
Lead
1.009869