This report demonstrates:
## 'data.frame': 506 obs. of 14 variables:
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : num 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
The Boston dataset contains:
##
## Call:
## lm(formula = medv ~ crim, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.957 -5.449 -2.007 2.512 29.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.03311 0.40914 58.74 <2e-16 ***
## crim -0.41519 0.04389 -9.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.484 on 504 degrees of freedom
## Multiple R-squared: 0.1508, Adjusted R-squared: 0.1491
## F-statistic: 89.49 on 1 and 504 DF, p-value: < 2.2e-16
Crime rate significance is determined using the p-value.
If p < 0.05, crime rate significantly affects house value.
##
## Call:
## lm(formula = medv ~ crim + rm + ptratio + nox, data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.966 -3.207 -0.563 1.881 39.588
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.64051 4.36496 1.521 0.129
## crim -0.13906 0.03362 -4.136 4.14e-05 ***
## rm 6.90415 0.40150 17.196 < 2e-16 ***
## ptratio -1.06741 0.12943 -8.247 1.44e-15 ***
## nox -13.15253 2.49470 -5.272 2.01e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.724 on 501 degrees of freedom
## Multiple R-squared: 0.6157, Adjusted R-squared: 0.6126
## F-statistic: 200.6 on 4 and 501 DF, p-value: < 2.2e-16
Interpretation:
More rooms increase value, pollution decreases value.
Boston$residuals <- resid(model_b)
Boston$fitted <- fitted(model_b)
ggplot(Boston,
aes(x = fitted,
y = residuals)) +
geom_point() +
geom_smooth(method = "lm") +
labs(
title = "Residual Plot",
x = "Fitted Values",
y = "Residuals"
)## `geom_smooth()` using formula = 'y ~ x'
Residuals should scatter randomly if the model fits well.
sealevel <- read.csv("sealevel.csv")
sealevel$date <- as.Date(sealevel$date)
sealevel$year <- format(sealevel$date, "%Y")
year_data <- sealevel %>%
group_by(year) %>%
summarise(
mean_level = mean(sealevel, na.rm=TRUE),
max_level = max(sealevel, na.rm=TRUE),
min_level = min(sealevel, na.rm=TRUE)
)
ggplot(year_data,
aes(x = as.numeric(year))) +
geom_line(aes(y = mean_level)) +
geom_line(aes(y = max_level)) +
geom_line(aes(y = min_level)) +
labs(
title = "Sea Level Statistics per Year",
x = "Year",
y = "Sea Level (cm)"
)This plot shows yearly mean, maximum, and minimum sea levels.
sealevel$month <- month(sealevel$date)
sealevel$year <- year(sealevel$date)
data_10yr <- sealevel %>%
filter(year >= 1960 & year <= 2009)
data_10yr$period <- cut(
data_10yr$year,
breaks = seq(1960, 2010, by = 10),
right = FALSE
)
month_data <- data_10yr %>%
group_by(period, month) %>%
summarise(
median_level = median(sealevel, na.rm=TRUE),
max_level = max(sealevel, na.rm=TRUE),
.groups="drop"
)
ggplot(month_data,
aes(x = month,
y = median_level,
fill = period)) +
geom_col(position = "dodge") +
labs(
title = "Median Sea Level per Month",
x = "Month",
y = "Sea Level (cm)"
)This compares monthly sea level trends across decades.
# Function to read csv file
readFile <- function(arg1) {
data <- read.csv(arg1)
return(data)
}
# List files
file_list <- list.files(
"data_folder",
pattern = "sl_stat",
full.names = TRUE
)
# Combine files safely
data_list <- list()
for (i in 1:min(10, length(file_list))) {
data_list[[i]] <- readFile(file_list[i])
}
combined_data <- do.call(rbind, data_list)
# Standardize formats
data_list <- list()
for (i in 1:length(file_list)) {
temp <- read.csv(file_list[i])
names(temp) <- c("date", "sealevel")
data_list[[i]] <- temp
}
final_data <- do.call(rbind, data_list)This section is disabled to avoid file path errors.