Biniyam Aklilu

# Load necessary libraries
library(ggplot2)  # For plotting
library(readr)    # For reading data
library(visreg)   # For visualizing regression models
library(ggsci)    # For color palettes

# Load the CSV file with the correct delimiter and specify decimal format
file_path <- "/Users/Gebrekidan/Book5.csv"
data <- read_delim(file_path, delim = ";", locale = locale(decimal_mark = ","))
Rows: 2044 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (4): date, Rainfall, River Level, Wind speed

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns to remove spaces
names(data) <- make.names(names(data))

# Convert numeric columns properly
data$Rainfall <- as.numeric(gsub(",", ".", data$Rainfall))
data$River.Level <- as.numeric(gsub(",", ".", data$River.Level))

# Remove empty columns (like ...4)
data <- data[, colSums(is.na(data)) < nrow(data)]

# Handle missing values
data <- na.omit(data)

# Perform linear regression
model <- lm(River.Level ~ Rainfall, data = data)
summary(model)

Call:
lm(formula = River.Level ~ Rainfall, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.2373 -0.2523 -0.1323  0.1323  3.4350 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.672321   0.010637   63.21   <2e-16 ***
Rainfall    0.024025   0.001518   15.83   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4492 on 2016 degrees of freedom
Multiple R-squared:  0.1105,  Adjusted R-squared:  0.1101 
F-statistic: 250.5 on 1 and 2016 DF,  p-value: < 2.2e-16
# Make predictions
data$predicted <- predict(model, newdata = data)

# Plot results with a color palette from ggsci
ggplot(data, aes(x = Rainfall, y = River.Level)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(x = "Rainfall [mm]", y = "Water Level [m]", title = "Regression Analysis: Level vs Rainfall") +
  theme_minimal() +  # Clean layout
  scale_color_viridis_d()  # Applying a color palette from ggsci
`geom_smooth()` using formula = 'y ~ x'

# Optionally, visualize the regression model with visreg
visreg(model, type = "conditional")

# Load necessary libraries
library(ggplot2)  # For plotting
library(readr)    # For reading data
library(visreg)   # For visualizing regression models
library(ggsci)    # For color palettes (from Scientific Journal Color Palettes)

# Load the CSV file with the correct delimiter and specify decimal format
file_path <- "/Users/Gebrekidan/Book2.csv"
data <- read_delim(file_path, delim = ";", locale = locale(decimal_mark = ","))
New names:
Rows: 2044 Columns: 4
── Column specification
──────────────────────────────────────────────────────── Delimiter: ";" chr
(3): date, Wind speed, River Level lgl (1): ...4
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...4`
# Rename columns to remove spaces (make them easier to work with in R)
names(data) <- make.names(names(data))

# Convert Wind.speed to numeric properly (since some values may use a comma as the decimal mark)
data$Wind.speed <- as.numeric(gsub(",", ".", data$Wind.speed))

# Remove empty columns (like ...4) that are likely irrelevant for your analysis
data <- data[, colSums(is.na(data)) < nrow(data)]

# Handle missing values by removing rows with any NA values
data <- na.omit(data)

# Perform linear regression: predict River.Level based on Wind.speed
model <- lm(River.Level ~ Wind.speed, data = data)
summary(model)  # Show model details and statistical summary

Call:
lm(formula = River.Level ~ Wind.speed, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.8686 -0.2905 -0.1496  0.1142  3.9535 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.53206    0.02859   18.61  < 2e-16 ***
Wind.speed   0.11776    0.01585    7.43 1.59e-13 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4699 on 2016 degrees of freedom
Multiple R-squared:  0.02665,   Adjusted R-squared:  0.02617 
F-statistic: 55.21 on 1 and 2016 DF,  p-value: 1.594e-13
# Make predictions based on the linear model
data$predicted <- predict(model, newdata = data)

# Visualization: Create a density heatmap to show the relationship between Wind.speed and River.Level
ggplot(data, aes(x = Wind.speed, y = River.Level)) +
  geom_bin2d(bins = 30) +  # 2D binning plot (density heatmap), bins = 30 is a good starting point
  scale_fill_gradient(low = "blue", high = "red") +  # Color gradient from blue (low) to red (high)
  labs(x = "Wind Speed [m/s]", y = "Water Level [m]", title = "Density Heatmap of Wind Speed vs Water Level") +
  theme_minimal()  # Clean minimal theme for the plot

# Load necessary libraries
library(ggplot2)  # For plotting
library(ggsci)    # For color palettes
library(visreg)   # For visualizing regression models
library(readr)    # For reading data

# Load the CSV file with the correct delimiter and specify decimal format
file_path <- "/Users/Gebrekidan/Book6.csv"
data <- read_delim(file_path, delim = ";", locale = locale(decimal_mark = ","))
Rows: 2044 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (3): date, River Level, Tmin

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns to remove spaces
names(data) <- make.names(names(data))

# Check column names after renaming
print(names(data))  
[1] "date"        "River.Level" "Tmin"       
# Convert numeric columns properly (only if necessary)
data$Tmin <- as.numeric(data$Tmin)
data$River.Level <- as.numeric(data$River.Level)

# Remove empty columns (like ...4)
data <- data[, colSums(is.na(data)) < nrow(data)]

# Handle missing values
data <- na.omit(data)

# Perform linear regression
model <- lm(River.Level ~ Tmin, data = data)
summary(model)

Call:
lm(formula = River.Level ~ Tmin, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.7440 -0.2475 -0.1160  0.0796  4.0344 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.924254   0.016369   56.46   <2e-16 ***
Tmin        -0.022902   0.001521  -15.06   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4516 on 2016 degrees of freedom
Multiple R-squared:  0.1011,    Adjusted R-squared:  0.1006 
F-statistic: 226.7 on 1 and 2016 DF,  p-value: < 2.2e-16
# Make predictions
data$predicted <- predict(model, newdata = data)

# Plot results with a custom color palette from ggsci
ggplot(data, aes(x = Tmin, y = River.Level)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(x = "Tmin[°C]", y = "Water Level [m]", title = "Regression Analysis: Level vs Tmin") +
  theme_minimal() +  # Clean layout
  scale_color_viridis_d()  # Using a color palette from ggsci for a more appealing plot
`geom_smooth()` using formula = 'y ~ x'

# Optionally, you can visualize the regression model using visreg
visreg(model, type = "conditional")

# Load necessary libraries
library(ggplot2)  # For plotting
library(ggsci)    # For color palettes
library(visreg)   # For visualizing regression models
library(readr)    # For reading data

# Load the CSV file with the correct delimiter and specify decimal format
file_path <- "/Users/Gebrekidan/Book1.csv"
data <- read_delim(file_path, delim = ";", locale = locale(decimal_mark = ","))
Rows: 2044 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (3): date, River Level, Tmax

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Rename columns to remove spaces
names(data) <- make.names(names(data))

# Check column names after renaming
print(names(data))  
[1] "date"        "River.Level" "Tmax"       
# Convert numeric columns properly (only if necessary)
data$Tmax <- as.numeric(data$Tmax)
data$River.Level <- as.numeric(data$River.Level)

# Remove empty columns (like ...4)
data <- data[, colSums(is.na(data)) < nrow(data)]

# Handle missing values
data <- na.omit(data)

# Perform linear regression
model <- lm(River.Level ~ Tmax, data = data)
summary(model)

Call:
lm(formula = River.Level ~ Tmax, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.8693 -0.2500 -0.0546  0.1076  3.9063 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.300760   0.024623   52.83   <2e-16 ***
Tmax        -0.027729   0.001108  -25.03   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.416 on 2016 degrees of freedom
Multiple R-squared:  0.2371,    Adjusted R-squared:  0.2367 
F-statistic: 626.4 on 1 and 2016 DF,  p-value: < 2.2e-16
# Make predictions
data$predicted <- predict(model, newdata = data)

# Plot results with a custom color palette from ggsci
ggplot(data, aes(x = Tmax, y = River.Level)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(x = "Tmax [°C]", y = "Water Level [m]", title = "Regression Analysis: Level vs Tmax") +
  theme_minimal() +  # Clean layout
  scale_color_viridis_d()  # Using a color palette from ggsci for a more appealing plot
`geom_smooth()` using formula = 'y ~ x'

# Optionally, you can visualize the regression model using visreg
visreg(model, type = "conditional")