Column
[1] "Organization: Telangana State Civil Supplies Corporation Limited"
[1] "The dataset about the FP Shop Distribution data in Telangana."
[1] "This dataset contains 16970 obs. of 13 variables:"
[1] "distCode : District Code"
[1] "officeCode : Represents the office code"
[1] "shopNo : Representing Shop number"
[1] "noOfRcs : No.of Ration Cards"
[1] "noOfTrans: No.of Transactions"
[1] "riceAfsc : Rice to Anthtodaya Food Security Cards"
[1] "riceFsc : Rice to Food Security Card"
[1] "riceAap : Rice to Annapurna Card"
[1] "sugar : sugar quantity"
[1] "totalAmount : total amount"
[1] "otherShopTransCnt : Other shops transaction quantity"
Distance measure
sub1.Classifi sub1.distCode sub1.officeCode sub1.shopNo
1 Rural 532 532001 1901001
2 Rural 532 532001 1901002
3 Rural 532 532001 1901003
4 Rural 532 532001 1901004
5 Rural 532 532001 1901005
6 Rural 532 532001 1901006
plot of hierarchical view of data

dendrogram

accuracy of clustering
fit
1 2 3
6326 4995 5658
[,1]
1 6326
2 4995
3 5658
[1] 0.3725779
---
title: "Public distribution system detect corruption of sugar using Kmeans algorithm"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: scroll
theme: simplex
social: menu
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(cluster)
library(dplyr)
library(lattice)
library(readr)
library(dendextend)
```
Column {data-width=650}
-----------------------------------------------------------------------
```{r}
#Divisive clustering
#distance matrix
data <- read.csv("C:\\Users\\India\\Documents\\Dma\\shop-wise-trans-detailsTelangana State Civil Supplies Distribution - FP Shop Wise Transactions Data_01-01-2021 to 31-01-2021.csv")
distric_class = data.frame(
distName=c("Adilabad","Bhadrdri Kothagudem","Hanumakonda","Hyderabad","Jagityal","Janagaon","Jayashankar Bhupalpalli","Jogulamba Gadwal","Kamareddy","Karimnagar","Khammam","Kumarambheem Asifabad","Mahabubabad","Mahbubnagar","Manchiryala","Medak","Medchal","Mulugu","Nagarkarnool","Nalgonda","Narayanpet","Nirmal","Nizamabad","Peddapalli","Rajanna Siricilla","Ranga Reddy","Sangareddy","Siddipet","Suryapet","Vikarabad","Wanaparthy","Warangal","Yadadri Bhuvanagiri" ),
Classifi=c("Rural","Rural","Urban","Urban","Rural","Urban","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Rural","Urban","Rural","Rural","Rural","Urban","Rural","Rural","Urban","Rural","Urban","Rural","Rural"))
#Merge the classification data with the main dataset based on district names
sub1=merge(data,distric_class,by='distName',,all.x=TRUE)
sub=data.frame(sub1$Classifi,sub1$distCode,sub1$officeCode,sub1$shopNo)
"Organization: Telangana State Civil Supplies Corporation Limited"
"The dataset about the FP Shop Distribution data in Telangana."
"This dataset contains 16970 obs. of 13 variables:"
"distCode : District Code"
"officeCode : Represents the office code"
"shopNo : Representing Shop number"
"noOfRcs : No.of Ration Cards"
"noOfTrans: No.of Transactions"
"riceAfsc : Rice to Anthtodaya Food Security Cards"
"riceFsc : Rice to Food Security Card"
"riceAap : Rice to Annapurna Card"
"sugar : sugar quantity"
"totalAmount : total amount"
"otherShopTransCnt : Other shops transaction quantity"
```
### Distance measure
```{r}
head(sub)
distancematrix=dist(sub,method='euclidean')
```
### plot of hierarchical view of data
```{r}
set.seed(40)
hier_clus=hclust(distancematrix,method='average')
plot(hier_clus,hang=0.1,main='hierarchical clustering',xlab = "",ylab="Height")
```
### dendrogram
```{r}
library(dendextend)
dend=as.dendrogram(hier_clus)
COLS=c("orange","turquoise","pink","lightblue","darkgreen","blue","green","white","brown","black","yellow","purple")
#names(COLS)=unique(sub1)
dend=color_labels(dend, col=COLS)
plot(dend,main='hierarchical clustering',xlab = "PDS levels ",ylab="Height")
rect.hclust(hier_clus,k=3,border = 'red')
```
### accuracy of clustering
```{r}
fit=cutree(hier_clus,k=3)
table(fit)
cm=as.matrix(table(fit))
cm
accuracy = sum(diag(cm))/sum(cm)
accuracy
```