# Load necessary librarieslibrary(ggplot2) # For plottinglibrary(readr) # For reading datalibrary(visreg) # For visualizing regression modelslibrary(ggsci) # For color palettes# Load the CSV file with the correct delimiter and specify decimal formatfile_path <-"/Users/Gebrekidan/Book5.csv"data <-read_delim(file_path, delim =";", locale =locale(decimal_mark =","))
Rows: 2044 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (4): date, Rainfall, River Level, Wind speed
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Call:
lm(formula = River.Level ~ Rainfall, data = data)
Residuals:
Min 1Q Median 3Q Max
-2.2373 -0.2523 -0.1323 0.1323 3.4350
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.672321 0.010637 63.21 <2e-16 ***
Rainfall 0.024025 0.001518 15.83 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4492 on 2016 degrees of freedom
Multiple R-squared: 0.1105, Adjusted R-squared: 0.1101
F-statistic: 250.5 on 1 and 2016 DF, p-value: < 2.2e-16
# Make predictionsdata$predicted <-predict(model, newdata = data)# Plot results with a color palette from ggsciggplot(data, aes(x = Rainfall, y = River.Level)) +geom_point(color ="blue") +geom_smooth(method ="lm", color ="red") +labs(x ="Rainfall [mm]", y ="Water Level [m]", title ="Regression Analysis: Level vs Rainfall") +theme_minimal() +# Clean layoutscale_color_viridis_d() # Applying a color palette from ggsci
`geom_smooth()` using formula = 'y ~ x'
# Optionally, visualize the regression model with visregvisreg(model, type ="conditional")
# Load necessary librarieslibrary(ggplot2) # For plottinglibrary(readr) # For reading datalibrary(visreg) # For visualizing regression modelslibrary(ggsci) # For color palettes (from Scientific Journal Color Palettes)# Load the CSV file with the correct delimiter and specify decimal formatfile_path <-"/Users/Gebrekidan/Book2.csv"data <-read_delim(file_path, delim =";", locale =locale(decimal_mark =","))
New names:
Rows: 2044 Columns: 4
── Column specification
──────────────────────────────────────────────────────── Delimiter: ";" chr
(3): date, Wind speed, River Level lgl (1): ...4
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...4`
# Rename columns to remove spaces (make them easier to work with in R)names(data) <-make.names(names(data))# Convert Wind.speed to numeric properly (since some values may use a comma as the decimal mark)data$Wind.speed <-as.numeric(gsub(",", ".", data$Wind.speed))# Remove empty columns (like ...4) that are likely irrelevant for your analysisdata <- data[, colSums(is.na(data)) <nrow(data)]# Handle missing values by removing rows with any NA valuesdata <-na.omit(data)# Perform linear regression: predict River.Level based on Wind.speedmodel <-lm(River.Level ~ Wind.speed, data = data)summary(model) # Show model details and statistical summary
Call:
lm(formula = River.Level ~ Wind.speed, data = data)
Residuals:
Min 1Q Median 3Q Max
-0.8686 -0.2905 -0.1496 0.1142 3.9535
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.53206 0.02859 18.61 < 2e-16 ***
Wind.speed 0.11776 0.01585 7.43 1.59e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4699 on 2016 degrees of freedom
Multiple R-squared: 0.02665, Adjusted R-squared: 0.02617
F-statistic: 55.21 on 1 and 2016 DF, p-value: 1.594e-13
# Make predictions based on the linear modeldata$predicted <-predict(model, newdata = data)# Visualization: Create a density heatmap to show the relationship between Wind.speed and River.Levelggplot(data, aes(x = Wind.speed, y = River.Level)) +geom_bin2d(bins =30) +# 2D binning plot (density heatmap), bins = 30 is a good starting pointscale_fill_gradient(low ="blue", high ="red") +# Color gradient from blue (low) to red (high)labs(x ="Wind Speed [m/s]", y ="Water Level [m]", title ="Density Heatmap of Wind Speed vs Water Level") +theme_minimal() # Clean minimal theme for the plot
# Load necessary librarieslibrary(ggplot2) # For plottinglibrary(ggsci) # For color paletteslibrary(visreg) # For visualizing regression modelslibrary(readr) # For reading data# Load the CSV file with the correct delimiter and specify decimal formatfile_path <-"/Users/Gebrekidan/Book6.csv"data <-read_delim(file_path, delim =";", locale =locale(decimal_mark =","))
Rows: 2044 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (3): date, River Level, Tmin
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns to remove spacesnames(data) <-make.names(names(data))# Check column names after renamingprint(names(data))
Call:
lm(formula = River.Level ~ Tmin, data = data)
Residuals:
Min 1Q Median 3Q Max
-0.7440 -0.2475 -0.1160 0.0796 4.0344
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.924254 0.016369 56.46 <2e-16 ***
Tmin -0.022902 0.001521 -15.06 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4516 on 2016 degrees of freedom
Multiple R-squared: 0.1011, Adjusted R-squared: 0.1006
F-statistic: 226.7 on 1 and 2016 DF, p-value: < 2.2e-16
# Make predictionsdata$predicted <-predict(model, newdata = data)# Plot results with a custom color palette from ggsciggplot(data, aes(x = Tmin, y = River.Level)) +geom_point(color ="blue") +geom_smooth(method ="lm", color ="red") +labs(x ="Tmin[°C]", y ="Water Level [m]", title ="Regression Analysis: Level vs Tmin") +theme_minimal() +# Clean layoutscale_color_viridis_d() # Using a color palette from ggsci for a more appealing plot
`geom_smooth()` using formula = 'y ~ x'
# Optionally, you can visualize the regression model using visregvisreg(model, type ="conditional")
# Load necessary librarieslibrary(ggplot2) # For plottinglibrary(ggsci) # For color paletteslibrary(visreg) # For visualizing regression modelslibrary(readr) # For reading data# Load the CSV file with the correct delimiter and specify decimal formatfile_path <-"/Users/Gebrekidan/Book1.csv"data <-read_delim(file_path, delim =";", locale =locale(decimal_mark =","))
Rows: 2044 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (3): date, River Level, Tmax
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns to remove spacesnames(data) <-make.names(names(data))# Check column names after renamingprint(names(data))
Call:
lm(formula = River.Level ~ Tmax, data = data)
Residuals:
Min 1Q Median 3Q Max
-0.8693 -0.2500 -0.0546 0.1076 3.9063
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.300760 0.024623 52.83 <2e-16 ***
Tmax -0.027729 0.001108 -25.03 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.416 on 2016 degrees of freedom
Multiple R-squared: 0.2371, Adjusted R-squared: 0.2367
F-statistic: 626.4 on 1 and 2016 DF, p-value: < 2.2e-16
# Make predictionsdata$predicted <-predict(model, newdata = data)# Plot results with a custom color palette from ggsciggplot(data, aes(x = Tmax, y = River.Level)) +geom_point(color ="blue") +geom_smooth(method ="lm", color ="red") +labs(x ="Tmax [°C]", y ="Water Level [m]", title ="Regression Analysis: Level vs Tmax") +theme_minimal() +# Clean layoutscale_color_viridis_d() # Using a color palette from ggsci for a more appealing plot
`geom_smooth()` using formula = 'y ~ x'
# Optionally, you can visualize the regression model using visregvisreg(model, type ="conditional")