Lets load the “chennai water Reservoir level” dataset for Predicting the model.
rain_lvl <- read.csv("chennai_reservoir_levels.csv")
summary(rain_lvl)
## Date POONDI CHOLAVARAM REDHILLS
## 01-01-2004: 1 Min. : 0.9 Min. : 0.0 Min. : 0.0
## 01-01-2005: 1 1st Qu.: 198.0 1st Qu.: 17.0 1st Qu.: 804.2
## 01-01-2006: 1 Median : 749.0 Median : 90.5 Median :1605.5
## 01-01-2007: 1 Mean :1115.6 Mean :236.2 Mean :1543.5
## 01-01-2008: 1 3rd Qu.:1990.0 3rd Qu.:453.8 3rd Qu.:2223.0
## 01-01-2009: 1 Max. :3231.0 Max. :896.0 Max. :3300.0
## (Other) :5732
## CHEMBARAMBAKKAM
## Min. : 0.0
## 1st Qu.: 431.2
## Median :1207.0
## Mean :1300.4
## 3rd Qu.:2064.8
## Max. :3396.0
##
str(rain_lvl)
## 'data.frame': 5738 obs. of 5 variables:
## $ Date : Factor w/ 5738 levels "01-01-2004","01-01-2005",..: 1 190 379 568 757 946 1135 1324 1513 1702 ...
## $ POONDI : num 3.9 3.9 3.9 3.9 3.8 3.8 3.8 3.7 3.7 3.7 ...
## $ CHOLAVARAM : num 0 0 0 0 0 0 0 0 0 0 ...
## $ REDHILLS : num 268 268 267 267 267 266 266 265 264 264 ...
## $ CHEMBARAMBAKKAM: num 0 0 0 0 0 0 0 0 0 0 ...
dim(rain_lvl)
## [1] 5738 5
names(rain_lvl)
## [1] "Date" "POONDI" "CHOLAVARAM" "REDHILLS"
## [5] "CHEMBARAMBAKKAM"
Cleaning the Outlayered Data.
library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
glimpse(rain_lvl)
## Observations: 5,738
## Variables: 5
## $ Date <fct> 01-01-2004, 02-01-2004, 03-01-2004, 04-01-2004...
## $ POONDI <dbl> 3.9, 3.9, 3.9, 3.9, 3.8, 3.8, 3.8, 3.7, 3.7, 3...
## $ CHOLAVARAM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ REDHILLS <dbl> 268, 268, 267, 267, 267, 266, 266, 265, 264, 2...
## $ CHEMBARAMBAKKAM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
lvl_res<-rain_lvl%>%filter(rain_lvl$POONDI>0 & rain_lvl$CHOLAVARAM >0 & rain_lvl$REDHILLS >0 & rain_lvl$CHEMBARAMBAKKAM >0)
rain_lvl<-separate(lvl_res,Date,c("Day","Month","Year"),sep="-")
names(rain_lvl)
## [1] "Day" "Month" "Year" "POONDI"
## [5] "CHOLAVARAM" "REDHILLS" "CHEMBARAMBAKKAM"
Exploratory Data Analysis
POONDI column data analysis
library(ggplot2)
#Boxplot
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$POONDI))+geom_boxplot()+labs(title="Rain Level in POONDI",x="Year",y="Rain level in Poodi")

#Heat Map
ggplot(data=rain_lvl,aes(x=rain_lvl$Month,y=rain_lvl$Year,fill=rain_lvl$POONDI))+geom_tile()+labs(title="Rain Level in POONDI",x="Year",y="Rain level in Poodi")

#Density Plot
ggplot(data=rain_lvl,aes(x=POONDI,fill=Year))+geom_density(alpha=0.1)+scale_x_log10()

#Bar Chart
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$POONDI))+geom_col()+labs(title="Rain Level in POONDI",x="Year",y="Rain level in Poodi")

REDHILLS River data analysis
#Boxplot
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$REDHILLS))+geom_boxplot()+
labs(title="Rain Level in REDHILLS",x="Year",y="Rain level in Redhills")

#Heatmap
ggplot(data=rain_lvl,aes(x=rain_lvl$Month,y=rain_lvl$Year,fill=rain_lvl$REDHILLS))+geom_tile()+labs(title="Rain Level in REDHILLS",x="Year",y="Rain level in Redhills")

#Density Plot
ggplot(data=rain_lvl,aes(x=REDHILLS,fill=Year))+geom_density(alpha=0.1)

#Barchart
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$REDHILLS))+geom_col() +labs(title="Rain Level in REDHILLS",x="Year",y="Rain level in Redhills")

CHOLAVARAM River data Analysis
#Boxplot
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$CHOLAVARAM))+geom_boxplot()+labs(title="Rain Level in CHOLAVARAM",x="Year",y="Rain level in Cholavaram")

#Heat Map
ggplot(data=rain_lvl,aes(x=rain_lvl$Month,y=rain_lvl$Year,fill=rain_lvl$CHOLAVARAM))+geom_tile()+labs(title="Rain Level in CHOLAVARAM",x="Year",y="Rain level in Cholavaram")

#Density Plot
ggplot(data=rain_lvl,aes(x=CHOLAVARAM,fill=Year))+geom_density(alpha=0.1)+scale_x_log10()

#Bar Chart
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$CHOLAVARAM))+geom_col()+labs(title="Rain Level in CHOLAVARAM",x="Year",y="Rain level in Cholavaram")

CHEMBARAMBAKKAM River data analysis
#Boxplot
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$CHEMBARAMBAKKAM))+geom_boxplot()+labs(title="Rain Level in CHEMBARAMBAKKAM",x="Year",y="Rain level in Chembarambakkam")

#Heat map
ggplot(data=rain_lvl,aes(x=rain_lvl$Month,y=rain_lvl$Year,fill=rain_lvl$CHEMBARAMBAKKAM))+geom_tile()+labs(title="Rain Level in CHEMBARAMBAKKAM",x="Year",y="Rain level in Chembarambakkam")

#Density Plot
ggplot(data=rain_lvl,aes(x=CHEMBARAMBAKKAM,fill=Year))+geom_density(alpha=0.1)+scale_x_log10()

#Bar chart
ggplot(data=rain_lvl,aes(x=rain_lvl$Year,y=rain_lvl$CHEMBARAMBAKKAM))+geom_col()+labs(title="Rain Level in CHEMBARAMBAKKAM",x="Year",y="Rain level in Chembarambakkam")

Machine Learning Algorithm for Chennai Reservoir Level dataset
rain_lvl1=rain_lvl[3:7]
#Elbow plot
library(cluster)
set.seed(5)
wcss=vector()
for(i in 1:10)
wcss[i]=sum(kmeans(rain_lvl,i)$withinss)
wcss
## [1] 11075615105 3947107319 2522634465 1836678451 1500189826
## [6] 1381666215 1228609990 1126126192 1025936477 996526094
plot(1:10,wcss,type='b',main="The Elbow Method",xlab="Number of Cluster",ylab='WCSS')
