---
title: "EDA for Assignment_2"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: scroll
theme: cosmo
social: menu
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library('ggvis')
library('tidyverse')
library('ggplot2')
library(dplyr)
library(tidyr)
library(lattice)
data = read.csv('/cloud/project/RTYB21.csv', header=T, na.strings='')
head(data)
#View(data)
# Check for missing values
colSums(is.na(data))
str(data)
# View summary statistics for numerical columns
summary(data)
names(data)
distinct(data,Name.of.State.UT)
dataset <- data[-nrow(data), ]
print(dataset)
#view(dataset)
```
Column {data-width=650}
-----------------------------------------------------------------------
### Histogram Chart
```{r}
summary(data)
names(data)
# Histogram for Total Tax Revenue
ggplot(dataset, aes(x = X2020.21...Total)) +
geom_histogram(fill = "coral",color="steelblue") +
geom_vline(xintercept = mean(dataset$X2020.21...Total),color="red") +
geom_vline(xintercept = median(dataset$X2020.21...Total),color="blue") +
labs(title = "Total Tax Revenue", x = "2020.21 Total Tax", y = "Frequency")
# Histogram for Total Tax on Vehicles
ggplot(dataset, aes(x = X2020.21..Tax.on.Vehicles)) +
geom_histogram(fill = "blue", color = "black") +
labs(title = "Histogram of Tax on Vehicles",
x = "Tax on Vehicles",
y = "Frequency")
# histogram for Total Tax Revenue Vs State/UT
histogram(~X2020.21...Total|Name.of.State.UT,data=dataset,breaks=50,col=c("pink","green"), main="Total Tax Revenue Vs State/UT")
# stacked bar plot
ggplot(dataset, aes(x = Name.of.State.UT, y = X2020.21...Total, fill = X2020.21...Total)) +
geom_bar(stat = "identity") +theme_bw() +
geom_hline(yintercept = mean(dataset$X2020.21..Tax.on.Vehicles), color = "coral") +
geom_hline(yintercept = mean(dataset$X2020.21...Total), color = "red") +
geom_hline(yintercept = mean(dataset$X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue), color = "green") +
labs(title = "States.UTs & Tax on Vehicles", x = "States/UT", y = "Total")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
Column {data-width=350}
-----------------------------------------------------------------------
### Scatter plot
```{r}
# Scatter plot for Name of/State/UT vs. 2020.21(RE)Percentage of States/UTs Own Tax Revenue
ggplot(dataset, aes(x =Name.of.State.UT , y = X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue)) +
geom_point(color = "darkgreen") +
theme_minimal() +
labs(title = "Name of State/UT vs. 2020.21(RE)Percentage of States/UTs Own Tax Revenue", x = "Name.of.State.UT", y = "2020.21(RE)Percentage.of.States.Revenue")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Scatter plot for 2020.21 Total Tax Revenue
ggplot(dataset, aes(x = Name.of.State.UT , y = X2020.21...Total)) +
geom_point(color = "coral") +
theme_minimal() +
labs(title = "Name of State/UT vs. X2020.21 Total Tax Revenue", x = "Name of State/UT", y = "2020.21 Total Tax Revenue")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Bar plot for Tax on Vehicles distribution
ggplot(dataset, aes(x = X2020.21..Tax.on.Vehicles)) +
geom_bar(fill = "steelblue", color = "orange") +
geom_vline(xintercept = mean(dataset$X2020.21..Tax.on.Vehicles),color="red") +
geom_vline(xintercept = median(dataset$X2020.21..Tax.on.Vehicles),color="blue") +
#theme_minimal() +
labs(title = "Distribution of 2020-21 Tax on Vehicles", x = "2020-21 Tax on Vehicles", y = "Count") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
### Box plot
```{r}
# Box plot for 2020-21(RE)Percentage.of.Tax.Revenue
ggplot(dataset, aes(x = , y = X2020.21..R.E...Percentage.of.States.UTs.Own.Tax.Revenue)) +
geom_boxplot(fill = "lightcoral", color = "black") +
theme_minimal() +
labs(title = "Box Plot of Road Length", y = "2020-21(RE)Percentage.of.Tax.Revenue")
# Box plot for Total Tax Revenue
ggplot(dataset, aes(y = X2020.21...Total)) +
geom_boxplot(fill = "lightblue", color = "black") +
theme_minimal() +
labs(title = "Box Plot of Total Tax Revenue", y = "Total Tax Revenue")
# bwplot Total Tax Revenue Vs State/UT
bwplot(~X2020.21...Total|Name.of.State.UT,data = dataset, main="Total Tax Revenue Vs State/UT")
```