spc_tbl_ [74 × 16] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ state_name : chr [1:74] "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" "Andhra Pradesh" ...
$ district_name : chr [1:74] "Krishna" "Krishna" "Krishna" "Krishna" ...
$ block_name : chr [1:74] "Kalidindi" "Kalidindi" "Kalidindi" "Kalidindi" ...
$ panchayat_name : chr [1:74] "Konduru" "Chinatadinada" "Chinatadinada" "Venkatapuram" ...
$ village_name : chr [1:74] "Konduru" "Tadinada" "Tadinada" "Venkatapuram" ...
$ habitation_name : chr [1:74] "Konduru Scl" "Pallepalem" "Harijanawada" "Harijanawada" ...
$ habitation_id : chr [1:74] "0000015089" "0000015094" "0000015095" "0000015105" ...
$ households : num [1:74] 111 275 317 64 3 327 599 106 115 365 ...
$ house_connection_by_pws : num [1:74] 50 13 113 64 3 239 396 50 99 82 ...
$ ispws : chr [1:74] "Yes" "Yes" "Yes" "Yes" ...
$ gen_current_pop : num [1:74] 0 994 651 232 4 ...
$ sc_current_pop : num [1:74] 397 13 477 0 9 654 201 5 194 0 ...
$ st_current_pop : num [1:74] 0 0 0 0 0 2 33 0 0 0 ...
$ total_current_pop : num [1:74] 397 1007 1128 232 13 ...
$ water_quality_contamination: logi [1:74] NA NA NA NA NA NA ...
$ update_date : Date[1:74], format: "2024-06-07" "2024-06-07" ...
- attr(*, "spec")=
.. cols(
.. state_name = col_character(),
.. district_name = col_character(),
.. block_name = col_character(),
.. panchayat_name = col_character(),
.. village_name = col_character(),
.. habitation_name = col_character(),
.. habitation_id = col_character(),
.. households = col_double(),
.. house_connection_by_pws = col_double(),
.. ispws = col_character(),
.. gen_current_pop = col_double(),
.. sc_current_pop = col_double(),
.. st_current_pop = col_double(),
.. total_current_pop = col_double(),
.. water_quality_contamination = col_logical(),
.. update_date = col_date(format = "")
.. )
- attr(*, "problems")=<externalptr>
state_name district_name block_name panchayat_name
Length:74 Length:74 Length:74 Length:74
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
village_name habitation_name habitation_id households
Length:74 Length:74 Length:74 Min. : 3.0
Class :character Class :character Class :character 1st Qu.:111.5
Mode :character Mode :character Mode :character Median :252.0
Mean :269.5
3rd Qu.:373.5
Max. :847.0
house_connection_by_pws ispws gen_current_pop sc_current_pop
Min. : 1.0 Length:74 Min. : 0.0 Min. : 0.00
1st Qu.: 26.0 Class :character 1st Qu.: 314.5 1st Qu.: 0.00
Median : 88.5 Mode :character Median : 835.0 Median : 5.00
Mean :139.1 Mean : 968.3 Mean : 83.45
3rd Qu.:188.5 3rd Qu.:1353.2 3rd Qu.:116.00
Max. :675.0 Max. :6785.0 Max. :654.00
st_current_pop total_current_pop water_quality_contamination
Min. : 0.000 Min. : 13.0 Mode:logical
1st Qu.: 0.000 1st Qu.: 402.0 NA's:74
Median : 0.000 Median : 923.5
Mean : 7.149 Mean :1058.9
3rd Qu.: 0.000 3rd Qu.:1377.0
Max. :144.000 Max. :6929.0
update_date
Min. :2024-06-07
1st Qu.:2024-06-07
Median :2024-06-07
Mean :2024-06-07
3rd Qu.:2024-06-07
Max. :2024-06-07
# A tibble: 6 × 16
state_name district_name block_name panchayat_name village_name
<chr> <chr> <chr> <chr> <chr>
1 Andhra Pradesh Krishna Kalidindi Konduru Konduru
2 Andhra Pradesh Krishna Kalidindi Chinatadinada Tadinada
3 Andhra Pradesh Krishna Kalidindi Chinatadinada Tadinada
4 Andhra Pradesh Krishna Kalidindi Venkatapuram Venkatapuram
5 Andhra Pradesh Krishna Kalidindi Korukollu Korukollu
6 Andhra Pradesh Krishna Kalidindi Lodida Lanka Kalidindi
# ℹ 11 more variables: habitation_name <chr>, habitation_id <chr>,
# households <dbl>, house_connection_by_pws <dbl>, ispws <chr>,
# gen_current_pop <dbl>, sc_current_pop <dbl>, st_current_pop <dbl>,
# total_current_pop <dbl>, water_quality_contamination <lgl>,
# update_date <date>
households house_connection_by_pws gen_current_pop
households 1.0000000 0.83856282 0.6786421
house_connection_by_pws 0.8385628 1.00000000 0.6142656
gen_current_pop 0.6786421 0.61426562 1.0000000
sc_current_pop 0.1042510 0.07417009 -0.1165441
st_current_pop 0.3451441 0.33761499 0.7850502
total_current_pop 0.6937320 0.62561272 0.9899870
sc_current_pop st_current_pop total_current_pop
households 0.10425101 0.34514412 0.69373195
house_connection_by_pws 0.07417009 0.33761499 0.62561272
gen_current_pop -0.11654414 0.78505023 0.98998697
sc_current_pop 1.00000000 -0.09786525 0.02418007
st_current_pop -0.09786525 1.00000000 0.78463029
total_current_pop 0.02418007 0.78463029 1.00000000
---
title: "EDA for Households"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: simplex
social: menu
source_code: embed
navbar:
- { title: "Dataset Description", href: "#dataset-description" }
- { title: "Univariate Analysis", href: "#univariate-analysis" }
- { title: "Bivariate Analysis", href: "#bivariate-analysis" }
- { title: "Multivariate Analysis", href: "#multivariate-analysis" }
---
```{r setup, include=FALSE}
library(flexdashboard)
library(DT)
library(dplyr)
library(readr)
data <- read_csv("home.csv")
```
## Dataset Description {.tabset .active} {#dataset-description}
### View of dataset
```{r}
str(data)
datatable(data,extensions = 'Buttons',options = list(dom='Bfrtip',Buttons=c('copy','csv','print','pdf')))
```
### summary of the dataset
```{r}
summary(data)
```
### Head of DataSet
```{r}
head(data)
```
## Univariate Analysis {.tabset} {#univariate-analysis}
### Histogram for Households
```{r}
hist(data$households, main="Distribution of Households", xlab="Households", col="lightblue", border="black")
```
### Histogram for current population generation
```{r}
hist(data$gen_current_pop, main="Distribution of current population generation ", xlab="Households", col="red", border="black")
```
### Barplot
```{r}
boxplot(data$households, main="Boxplot of Households", ylab="Households")
```
## Bivariate Analysis {.tabset} {#bivariate-analysis}
### coorelation
```{r}
correlation_matrix <- cor(data[, sapply(data, is.numeric)], use="complete.obs")
print(correlation_matrix)
```
### PCA
```{r}
pca <- prcomp(data[, sapply(data, is.numeric)], center = TRUE, scale. = TRUE)
biplot(pca, main = "PCA Biplot")
dist_matrix <- dist(data[, sapply(data, is.numeric)])
```
## Muiltivariate Analysis {.tabset} {#multivariate-analysis}
### hierarchical clustering
```{r}
hc <- hclust(dist_matrix)
# Plot dendrogram
plot(hc, main = "Hierarchical Clustering Dendrogram")
mds <- cmdscale(dist_matrix, k = 2) # k is the number of dimensions
```
### Plot MDS results
```{r}
# Perform MDS
mds <- cmdscale(dist_matrix, k = 2) # k is the number of dimensions
# Plot MDS results
plot(mds, type = "n")
text(mds[, 1], mds[, 2], labels = 1:nrow(data), cex = 0.7)
title("MDS Plot")
```