Column

Histogram Chart

 Name.of.State.UT   X2020.21..Tax.on.Vehicles X2020.21..Tax.on.Goods.and.Pass.
 Length:32          Min.   :   25.0           Min.   :   0.00                 
 Class :character   1st Qu.:  299.5           1st Qu.:   0.00                 
 Mode  :character   Median : 1530.0           Median :   3.85                 
                    Mean   : 3792.6           Mean   : 145.30                 
                    3rd Qu.: 3031.5           3rd Qu.:  39.25                 
                    Max.   :60681.5           Max.   :2324.80                 
 X2020.21...Total  X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue
 Min.   :   25.4   Min.   : 1.700                                          
 1st Qu.:  302.6   1st Qu.: 4.200                                          
 Median : 1559.0   Median : 5.200                                          
 Mean   : 3937.9   Mean   : 5.578                                          
 3rd Qu.: 3055.9   3rd Qu.: 6.275                                          
 Max.   :63006.3   Max.   :16.600                                          
[1] "Name.of.State.UT"                                        
[2] "X2020.21..Tax.on.Vehicles"                               
[3] "X2020.21..Tax.on.Goods.and.Pass."                        
[4] "X2020.21...Total"                                        
[5] "X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue"

Column

Scatter plot

Box plot

---
title: "EDA for Assignment_2"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: scroll
    theme: cosmo
    social: menu
    source_code: embed
---

```{r setup, include=FALSE}
library(flexdashboard)
library('ggvis')
library('tidyverse')
library('ggplot2')
library(dplyr)
library(tidyr)
library(lattice)
data = read.csv('/cloud/project/RTYB21.csv', header=T, na.strings='')
head(data)
#View(data)
# Check for missing values
colSums(is.na(data))
str(data)
# View summary statistics for numerical columns
summary(data)
names(data)
distinct(data,Name.of.State.UT)
dataset <- data[-nrow(data), ]
print(dataset)
#view(dataset)


```

Column {data-width=650}
-----------------------------------------------------------------------

### Histogram Chart

```{r}
summary(data)
names(data)
# Histogram for Total Tax Revenue
ggplot(dataset, aes(x = X2020.21...Total)) + 
  geom_histogram(fill = "coral",color="steelblue") + 
  geom_vline(xintercept = mean(dataset$X2020.21...Total),color="red") +
  geom_vline(xintercept = median(dataset$X2020.21...Total),color="blue") +
  labs(title = "Total Tax Revenue", x = "2020.21 Total Tax", y = "Frequency")

# Histogram for Total Tax on Vehicles
ggplot(dataset, aes(x = X2020.21..Tax.on.Vehicles)) +
  geom_histogram(fill = "blue", color = "black") +
  labs(title = "Histogram of Tax on Vehicles",
       x = "Tax on Vehicles",
       y = "Frequency")

# histogram for Total Tax Revenue Vs State/UT
histogram(~X2020.21...Total|Name.of.State.UT,data=dataset,breaks=50,col=c("pink","green"), main="Total Tax Revenue Vs State/UT")

# stacked bar plot
ggplot(dataset, aes(x = Name.of.State.UT, y = X2020.21...Total, fill = X2020.21...Total)) +
  geom_bar(stat = "identity") +theme_bw() +
  geom_hline(yintercept = mean(dataset$X2020.21..Tax.on.Vehicles), color = "coral") +
  geom_hline(yintercept = mean(dataset$X2020.21...Total), color = "red") +
  geom_hline(yintercept = mean(dataset$X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue), color = "green") +
  labs(title = "States.UTs & Tax on Vehicles", x = "States/UT", y = "Total")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


```

Column {data-width=350}
-----------------------------------------------------------------------

###  Scatter plot 

```{r}
# Scatter plot for Name of/State/UT vs. 2020.21(RE)Percentage of States/UTs Own Tax Revenue
ggplot(dataset, aes(x =Name.of.State.UT , y = X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue)) + 
  geom_point(color = "darkgreen") + 
  theme_minimal() + 
  labs(title = "Name of State/UT vs. 2020.21(RE)Percentage of States/UTs Own Tax Revenue", x = "Name.of.State.UT", y = "2020.21(RE)Percentage.of.States.Revenue")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Scatter plot for 2020.21 Total Tax Revenue 
ggplot(dataset, aes(x = Name.of.State.UT , y = X2020.21...Total)) + 
  geom_point(color = "coral") + 
  theme_minimal() + 
  labs(title = "Name of State/UT vs. X2020.21 Total Tax Revenue", x = "Name of State/UT", y = "2020.21 Total Tax Revenue")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Bar plot for Tax on Vehicles distribution
ggplot(dataset, aes(x = X2020.21..Tax.on.Vehicles)) + 
  geom_bar(fill = "steelblue", color = "orange") + 
  geom_vline(xintercept = mean(dataset$X2020.21..Tax.on.Vehicles),color="red") +
  geom_vline(xintercept = median(dataset$X2020.21..Tax.on.Vehicles),color="blue") +
  #theme_minimal() + 
  labs(title = "Distribution of 2020-21 Tax on Vehicles", x = "2020-21 Tax on Vehicles", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

```

### Box plot

```{r}
# Box plot for 2020-21(RE)Percentage.of.Tax.Revenue
ggplot(dataset, aes(x = , y = X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue)) + 
  geom_boxplot(fill = "lightcoral", color = "black") + 
  theme_minimal() + 
  labs(title = "Box Plot of Road Length", y = "2020-21(RE)Percentage.of.Tax.Revenue")

# Box plot for Total Tax Revenue
ggplot(dataset, aes(y = X2020.21...Total)) + 
  geom_boxplot(fill = "lightblue", color = "black") + 
  theme_minimal() + 
  labs(title = "Box Plot of Total Tax Revenue", y = "Total Tax Revenue")

# bwplot Total Tax Revenue Vs State/UT
bwplot(~X2020.21...Total|Name.of.State.UT,data = dataset, main="Total Tax Revenue Vs State/UT")



```