---
title: "EDA Assignment_2"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: journal
social: menu
source_code: embed
---
```{r setup, include=FALSE}
library(dplyr)
library(tidyr)
library(ggplot2)
library(DT)
df <- read.csv("rain.csv")
rain<-na.omit(df)
```
## Dataset
```{r}
datatable(
rain,
extensions='Buttons',
option=list(
dom ='Bfrtip',
Buttons=c('copy','csv','print','pdf')
)
)
```
## Dataset Description {.tabset}
### About the dataset
```{r}
str(rain)
```
### Summary of the Dataset
```{r}
summary(rain)
```
## Rainfaall Distribution
```{r}
ggplot(rain,aes(x=ANNUAL)) +
geom_histogram()+
geom_vline(xintercept = mean(rain$ANNUAL),color="red") +
geom_vline(xintercept = median(rain$ANNUAL),color="blue") +
labs(title = "Distribution of Rainfall ", x = "Rainfall", y = "Count")
```
## Year Wise {.tabset}
### Year Rainfall Distribution
```{r}
group_by_year<-group_by(
dplyr::select(rain,
YEAR,ANNUAL,JF,MAM,JJAS,OND)
%>% group_by(YEAR) %>%
summarise(
ANNUAL_sum=sum(ANNUAL),
JF_sum=sum(JF),
MAM_sum=sum(MAM),
JJAS_sum=sum(JJAS),
OND_sum=sum(OND)
)
)
ggplot(group_by_year,aes(x=ANNUAL_sum)) +
geom_histogram( fill = "blue", color = "white") +
labs(title = "Distribution of Rainfall year", x = "Rainfall", y = "Count") +
geom_vline(xintercept = mean(group_by_year$ANNUAL_sum),color="green") +
geom_vline(xintercept = median(group_by_year$ANNUAL_sum),color="red")
```
### Stacked season
```{r}
#year wise analysis using seasons
long_data_season <- group_by_year %>%
pivot_longer(cols = c( MAM_sum, JJAS_sum, OND_sum,JF_sum),
names_to = "Season", values_to = "Seasonal_sum")
#head(long_data_season)
# stacked bar plot
ggplot(long_data_season, aes(x = YEAR, y = Seasonal_sum, fill = Season)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = mean(group_by_year$ANNUAL_sum), color = "black") +
geom_hline(yintercept = mean(group_by_year$JF_sum), color = "red") +
geom_hline(yintercept = mean(group_by_year$JJAS_sum), color = "green") +
geom_hline(yintercept = mean(group_by_year$MAM_sum), color = "blue") +
geom_hline(yintercept = mean(group_by_year$OND_sum), color = "purple") +
labs(title = "Annual Sum by Year (Stacked by Season)", x = "Year", y = "Sum")
```
## State wise analysis {.tabset}
```{r}
group_by_state<-group_by(
rain
%>% group_by(SUBDIVISION) %>%
summarise(
ANNUAL_avg=mean(ANNUAL),
JF_avg=mean(JF),
MAM_avg=mean(MAM),
JJAS_avg=mean(JJAS),
OND_avg=mean(OND)
)
)
#head(group_by_state)
```
### State wise
```{r}
#average rain fall each state
ggplot(group_by_state,aes(x=SUBDIVISION, y= ANNUAL_avg)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = mean(group_by_state$ANNUAL_avg),color="green") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
### minimun rainfall
```{r}
group_by_state_min<- filter(group_by_state,ANNUAL_avg<mean(ANNUAL_avg))
#head(group_by_state_min)
long_data_state_min <-group_by_state_min %>%
pivot_longer(cols = JF_avg:OND_avg,names_to = "name",values_to = "Values")
ggplot(long_data_state_min,aes(x=SUBDIVISION,y=ANNUAL_avg,fill=name))+
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
### maximum rainfall
```{r}
group_by_state_max<- filter(group_by_state,ANNUAL_avg>mean(ANNUAL_avg))
#head(group_by_state_max)
long_data_state_max <-group_by_state_max %>%
pivot_longer(cols = JF_avg:OND_avg,names_to = "name",values_to = "Values")
ggplot(long_data_state_max,aes(x=SUBDIVISION,y=ANNUAL_avg,fill=name))+
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
## Month wise Analysis {.tabset}
### Average Monthly rainfall
```{r}
df_long_month <- rain %>%
pivot_longer(cols = JAN:DEC, names_to = "Month", values_to = "Value")
# Boxplot for monthly values across all subdivisions
ggplot(df_long_month, aes(x = Month, y = Value)) +
geom_boxplot() +
labs(title = "Monthly Data Distribution", x = "Month", y = "Value") +
theme_minimal()
```
### Minimun rainfall
```{r}
for_month <-group_by(
rain
%>% group_by(SUBDIVISION) %>%
summarise(
ANNUAL=mean(ANNUAL),
JAN=mean(JAN),
FEB=mean(FEB),
MAR=mean(MAR),
APR=mean(APR),
MAY=mean(MAY),
JUN=mean(JUN),
JUL=mean(JUL),
AUG=mean(AUG),
SEP=mean(SEP),
OCT=mean(OCT),
NOV=mean(NOV),
DEC=mean(DEC),
)
)
#names(for_month)
#for min
for_month_min <- for_month %>%
filter(ANNUAL < mean(ANNUAL))
#head(for_month_min)
for_month_long_min <- for_month_min %>%
pivot_longer(cols = JAN:DEC, names_to = "Month", values_to = "Value")
#head(for_month_long_min)
ggplot(for_month_long_min,aes(x=Month, y= Value)) +
geom_bar(stat = "identity") +
facet_wrap(~SUBDIVISION) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
### Maximum rainfall
```{r}
for_month_max <- for_month %>%
filter(ANNUAL > mean(ANNUAL))
#head(for_month_max)
for_month_long_max <- for_month_max %>%
pivot_longer(cols = JAN:DEC, names_to = "Month", values_to = "Value")
#head(for_month_long_max)
ggplot(for_month_long_max,aes(x=Month, y= Value)) +
geom_bar(stat = "identity") +
facet_wrap(~SUBDIVISION) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```