Dataset

Dataset Description

About the dataset

'data.frame':   4162 obs. of  19 variables:
 $ SUBDIVISION: chr  "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" "Andaman & Nicobar Islands" ...
 $ YEAR       : int  1901 1902 1903 1904 1905 1906 1907 1908 1910 1911 ...
 $ JAN        : num  49.2 0 12.7 9.4 1.3 ...
 $ FEB        : num  87.1 159.8 144 14.7 0 ...
 $ MAR        : num  29.2 12.2 0 0 3.3 ...
 $ APR        : num  2.3 0 1 202.4 26.9 ...
 $ MAY        : num  529 446 235 304 280 ...
 $ JUN        : num  518 537 480 495 629 ...
 $ JUL        : num  365 229 728 502 369 ...
 $ AUG        : num  481 754 327 160 330 ...
 $ SEP        : num  333 666 339 820 297 ...
 $ OCT        : num  388 197 181 222 261 ...
 $ NOV        : num  558.2 359 284.4 308.7 25.4 ...
 $ DEC        : num  33.6 160.5 225 40.1 344.7 ...
 $ ANNUAL     : num  3373 3521 2957 3080 2567 ...
 $ JF         : num  136.3 159.8 156.7 24.1 1.3 ...
 $ MAM        : num  560 458 236 507 310 ...
 $ JJAS       : num  1696 2186 1874 1978 1625 ...
 $ OND        : num  980 717 691 571 631 ...
 - attr(*, "na.action")= 'omit' Named int [1:26] 22 23 31 35 41 43 113 114 147 150 ...
  ..- attr(*, "names")= chr [1:26] "22" "23" "31" "35" ...

Summary of the Dataset

 SUBDIVISION             YEAR           JAN              FEB        
 Length:4162        Min.   :1901   Min.   :  0.00   Min.   :  0.00  
 Class :character   1st Qu.:1930   1st Qu.:  0.60   1st Qu.:  0.50  
 Mode  :character   Median :1960   Median :  5.80   Median :  6.50  
                    Mean   :1959   Mean   : 18.80   Mean   : 21.44  
                    3rd Qu.:1989   3rd Qu.: 21.77   3rd Qu.: 26.50  
                    Max.   :2017   Max.   :583.70   Max.   :403.50  
      MAR              APR              MAY              JUN         
 Min.   :  0.00   Min.   :  0.00   Min.   :   0.0   Min.   :   0.40  
 1st Qu.:  1.00   1st Qu.:  3.00   1st Qu.:   8.6   1st Qu.:  70.62  
 Median :  7.85   Median : 15.30   Median :  36.3   Median : 138.50  
 Mean   : 27.31   Mean   : 42.67   Mean   :  84.8   Mean   : 228.83  
 3rd Qu.: 31.18   3rd Qu.: 49.17   3rd Qu.:  95.2   3rd Qu.: 301.18  
 Max.   :605.60   Max.   :595.10   Max.   :1168.6   Max.   :1609.90  
      JUL              AUG              SEP              OCT        
 Min.   :   0.0   Min.   :   0.0   Min.   :   0.1   Min.   :  0.00  
 1st Qu.: 175.2   1st Qu.: 155.8   1st Qu.: 100.4   1st Qu.: 14.60  
 Median : 284.6   Median : 258.9   Median : 173.4   Median : 64.70  
 Mean   : 346.3   Mean   : 289.4   Mean   : 197.0   Mean   : 94.96  
 3rd Qu.: 417.6   3rd Qu.: 377.6   3rd Qu.: 265.7   3rd Qu.:147.97  
 Max.   :2362.8   Max.   :1664.6   Max.   :1222.0   Max.   :948.30  
      NOV              DEC            ANNUAL             JF        
 Min.   :  0.00   Min.   :  0.0   Min.   :  62.3   Min.   :  0.00  
 1st Qu.:  0.60   1st Qu.:  0.1   1st Qu.: 803.0   1st Qu.:  4.00  
 Median :  9.30   Median :  3.0   Median :1120.3   Median : 18.80  
 Mean   : 39.18   Mean   : 18.8   Mean   :1409.4   Mean   : 40.24  
 3rd Qu.: 44.40   3rd Qu.: 17.3   3rd Qu.:1643.6   3rd Qu.: 49.88  
 Max.   :648.90   Max.   :617.5   Max.   :6331.1   Max.   :699.50  
      MAM              JJAS             OND        
 Min.   :   0.0   Min.   :  57.4   Min.   :   0.0  
 1st Qu.:  23.8   1st Qu.: 573.3   1st Qu.:  33.8  
 Median :  74.2   Median : 879.5   Median :  97.2  
 Mean   : 154.8   Mean   :1061.5   Mean   : 152.9  
 3rd Qu.: 195.3   3rd Qu.:1281.5   3rd Qu.: 210.9  
 Max.   :1745.8   Max.   :4536.9   Max.   :1252.5  

Rainfaall Distribution

Year Wise

Year Rainfall Distribution

Stacked season

State wise analysis

State wise

minimun rainfall

maximum rainfall

Month wise Analysis

Average Monthly rainfall

Minimun rainfall

Maximum rainfall

---
title: "EDA Assignment_2"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    theme: journal
    social: menu
    source_code: embed
---

```{r setup, include=FALSE}
library(dplyr)
library(tidyr)
library(ggplot2)
library(DT)
df <- read.csv("rain.csv")
rain<-na.omit(df)
```

## Dataset 

```{r}
datatable(
  rain,
  extensions='Buttons',
  option=list(
    dom ='Bfrtip',
    Buttons=c('copy','csv','print','pdf')
    )
  )
```

## Dataset Description {.tabset}

### About the dataset
```{r}
str(rain)
```

### Summary of the Dataset
```{r}
summary(rain)
```

## Rainfaall Distribution
```{r}
ggplot(rain,aes(x=ANNUAL)) +
  geom_histogram()+
  geom_vline(xintercept = mean(rain$ANNUAL),color="red") +
  geom_vline(xintercept = median(rain$ANNUAL),color="blue") +
  labs(title = "Distribution of Rainfall ", x = "Rainfall", y = "Count")
```

## Year Wise {.tabset}

### Year Rainfall Distribution
```{r}

group_by_year<-group_by(
  dplyr::select(rain,
                YEAR,ANNUAL,JF,MAM,JJAS,OND)
  %>% group_by(YEAR) %>%
    summarise(
      ANNUAL_sum=sum(ANNUAL),
      JF_sum=sum(JF),
      MAM_sum=sum(MAM),
      JJAS_sum=sum(JJAS),
      OND_sum=sum(OND)
    )
)


ggplot(group_by_year,aes(x=ANNUAL_sum)) +
  geom_histogram( fill = "blue", color = "white") +
  labs(title = "Distribution of Rainfall year", x = "Rainfall", y = "Count") +
  geom_vline(xintercept = mean(group_by_year$ANNUAL_sum),color="green") +
  geom_vline(xintercept = median(group_by_year$ANNUAL_sum),color="red")


```

### Stacked season
```{r}
#year wise analysis using seasons
long_data_season <- group_by_year %>%
  pivot_longer(cols = c( MAM_sum, JJAS_sum, OND_sum,JF_sum), 
               names_to = "Season", values_to = "Seasonal_sum")
#head(long_data_season)

# stacked bar plot
ggplot(long_data_season, aes(x = YEAR, y = Seasonal_sum, fill = Season)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = mean(group_by_year$ANNUAL_sum), color = "black") +
  geom_hline(yintercept = mean(group_by_year$JF_sum), color = "red") +
  geom_hline(yintercept = mean(group_by_year$JJAS_sum), color = "green") +
  geom_hline(yintercept = mean(group_by_year$MAM_sum), color = "blue") +
  geom_hline(yintercept = mean(group_by_year$OND_sum), color = "purple") +
  labs(title = "Annual Sum by Year (Stacked by Season)", x = "Year", y = "Sum") 

```


## State wise analysis {.tabset}
```{r}
group_by_state<-group_by(
  rain
  %>% group_by(SUBDIVISION) %>%
    summarise(
      ANNUAL_avg=mean(ANNUAL),
      JF_avg=mean(JF),
      MAM_avg=mean(MAM),
      JJAS_avg=mean(JJAS),
      OND_avg=mean(OND)
    )
)
#head(group_by_state)
```

### State wise
```{r}


#average rain fall each state
ggplot(group_by_state,aes(x=SUBDIVISION, y= ANNUAL_avg)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = mean(group_by_state$ANNUAL_avg),color="green") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

### minimun rainfall
```{r}
group_by_state_min<- filter(group_by_state,ANNUAL_avg<mean(ANNUAL_avg))
#head(group_by_state_min)

long_data_state_min <-group_by_state_min %>% 
  pivot_longer(cols = JF_avg:OND_avg,names_to = "name",values_to = "Values")

ggplot(long_data_state_min,aes(x=SUBDIVISION,y=ANNUAL_avg,fill=name))+
  geom_bar(stat="identity")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

### maximum rainfall
```{r}
group_by_state_max<- filter(group_by_state,ANNUAL_avg>mean(ANNUAL_avg))
#head(group_by_state_max)

long_data_state_max <-group_by_state_max %>% 
  pivot_longer(cols = JF_avg:OND_avg,names_to = "name",values_to = "Values")

ggplot(long_data_state_max,aes(x=SUBDIVISION,y=ANNUAL_avg,fill=name))+
  geom_bar(stat="identity")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

## Month wise Analysis {.tabset}

### Average Monthly rainfall
```{r}
df_long_month <- rain %>%
  pivot_longer(cols = JAN:DEC, names_to = "Month", values_to = "Value")

# Boxplot for monthly values across all subdivisions
ggplot(df_long_month, aes(x = Month, y = Value)) +
  geom_boxplot() +
  labs(title = "Monthly Data Distribution", x = "Month", y = "Value") +
  theme_minimal()


```

### Minimun rainfall
```{r}
for_month <-group_by(
  rain
  %>% group_by(SUBDIVISION) %>%
    summarise(
      ANNUAL=mean(ANNUAL),
      JAN=mean(JAN),
      FEB=mean(FEB),
      MAR=mean(MAR),
      APR=mean(APR),
      MAY=mean(MAY),
      JUN=mean(JUN),
      JUL=mean(JUL),
      AUG=mean(AUG),
      SEP=mean(SEP),
      OCT=mean(OCT),
      NOV=mean(NOV),
      DEC=mean(DEC),
     
               )
    )

#names(for_month)

#for min 
for_month_min <- for_month %>%
  filter(ANNUAL < mean(ANNUAL))
#head(for_month_min)

for_month_long_min <- for_month_min  %>% 
  pivot_longer(cols = JAN:DEC, names_to = "Month", values_to = "Value")

#head(for_month_long_min)

ggplot(for_month_long_min,aes(x=Month, y= Value)) +
  geom_bar(stat = "identity") +
  facet_wrap(~SUBDIVISION) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

```

### Maximum rainfall
```{r}
for_month_max <- for_month %>%
  filter(ANNUAL > mean(ANNUAL))
#head(for_month_max)

for_month_long_max <- for_month_max  %>% 
  pivot_longer(cols = JAN:DEC, names_to = "Month", values_to = "Value")

#head(for_month_long_max)

ggplot(for_month_long_max,aes(x=Month, y= Value)) +
  geom_bar(stat = "identity") +
  facet_wrap(~SUBDIVISION) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```