#importing the libraries
library(ggplot2)
library(stringr)
library(viridis)
## Loading required package: viridisLite
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readr)
data <- read_csv("C:/Users/jayas/Desktop/Data Visualization Project/Data-Visualisation-of-GDP-TamilNadu/Data-Visualisation-of-GDP-TamilNadu/gdp_Tamilnadu.csv") #read the data
## Parsed with column specification:
## cols(
## .default = col_double(),
## Year = col_character(),
## Description = col_character()
## )
## See spec(...) for full column specifications.
class(data) #data frame
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
dim(data) #view number of rows and columns
## [1] 15 32
names(data) #view column names
## [1] "Year" "Description" "Chennai" "Coimbatore"
## [5] "Cuddalore" "Dharmapuri" "Dindigul" "Erode"
## [9] "Kancheepuram" "Kanniyakumari" "Karur" "Krishnagiri"
## [13] "Madurai" "Nagapattinam" "Namakkal" "Perambalur"
## [17] "Pudukkotai" "Ramanathapuram" "Salem" "Sivagangai"
## [21] "Thanjavur" "The Nilgris" "Theni" "Thiruchirappalli"
## [25] "Thirunelveli" "Thiruvallur" "Thiruvannamalai" "Thiruvarur"
## [29] "Thoothukodi" "Vellore" "Villupuram" "Virudhunagar"
head(data) #view first few observations
## # A tibble: 6 x 32
## Year Description Chennai Coimbatore Cuddalore Dharmapuri Dindigul Erode
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1999~ GDP (in Rs~ 13215. 11544. 4080. 2316. 4048. 6230.
## 2 2000~ GDP (in Rs~ 12726. 12716. 4431. 2432. 4166. 6669.
## 3 2001~ GDP (in Rs~ 12460. 12453. 4467. 2407. 4047. 6455.
## 4 2002~ GDP (in Rs~ 12812. 13171. 4749. 2497. 4085. 6453.
## 5 2003~ GDP (in Rs~ 13832. 14014. 5084. 2523. 4362. 6589.
## 6 2004~ GDP (in Rs~ 15009. 15546. 5389. 2973. 4916. 7763.
## # ... with 24 more variables: Kancheepuram <dbl>, Kanniyakumari <dbl>,
## # Karur <dbl>, Krishnagiri <dbl>, Madurai <dbl>, Nagapattinam <dbl>,
## # Namakkal <dbl>, Perambalur <dbl>, Pudukkotai <dbl>, Ramanathapuram <dbl>,
## # Salem <dbl>, Sivagangai <dbl>, Thanjavur <dbl>, `The Nilgris` <dbl>,
## # Theni <dbl>, Thiruchirappalli <dbl>, Thirunelveli <dbl>, Thiruvallur <dbl>,
## # Thiruvannamalai <dbl>, Thiruvarur <dbl>, Thoothukodi <dbl>, Vellore <dbl>,
## # Villupuram <dbl>, Virudhunagar <dbl>
str(data) #view the structure of data
## tibble [15 x 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Year : chr [1:15] "1999-00" "2000-01" "2001-02" "2002-03" ...
## $ Description : chr [1:15] "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" ...
## $ Chennai : num [1:15] 13215 12726 12460 12812 13832 ...
## $ Coimbatore : num [1:15] 11544 12716 12453 13171 14014 ...
## $ Cuddalore : num [1:15] 4080 4431 4467 4749 5084 ...
## $ Dharmapuri : num [1:15] 2316 2432 2407 2497 2523 ...
## $ Dindigul : num [1:15] 4048 4166 4047 4085 4362 ...
## $ Erode : num [1:15] 6230 6669 6455 6453 6589 ...
## $ Kancheepuram : num [1:15] 6845 7306 7529 7477 8163 ...
## $ Kanniyakumari : num [1:15] 3959 4291 4306 4411 4679 ...
## $ Karur : num [1:15] 1991 2205 2063 2103 2309 ...
## $ Krishnagiri : num [1:15] 2415 2594 2602 2810 2919 ...
## $ Madurai : num [1:15] 5628 5932 5882 5973 6382 ...
## $ Nagapattinam : num [1:15] 2788 2946 2753 2671 2916 ...
## $ Namakkal : num [1:15] 4114 4453 4275 3850 4091 ...
## $ Perambalur : num [1:15] 1593 1495 1435 1325 1319 ...
## $ Pudukkotai : num [1:15] 2403 2569 2593 2555 2653 ...
## $ Ramanathapuram : num [1:15] 2194 2397 2361 2453 2502 ...
## $ Salem : num [1:15] 6804 6807 6586 6896 7244 ...
## $ Sivagangai : num [1:15] 1872 1979 2004 2059 2104 ...
## $ Thanjavur : num [1:15] 3908 4209 4051 4207 4388 ...
## $ The Nilgris : num [1:15] 1517 1616 1569 1605 1871 ...
## $ Theni : num [1:15] 2048 2081 2108 2053 1967 ...
## $ Thiruchirappalli: num [1:15] 5287 5682 5674 5849 6269 ...
## $ Thirunelveli : num [1:15] 5692 5959 5994 6393 6420 ...
## $ Thiruvallur : num [1:15] 6578 7411 7275 7730 8327 ...
## $ Thiruvannamalai : num [1:15] 2978 3135 3236 3072 3259 ...
## $ Thiruvarur : num [1:15] 1881 2088 1845 1775 1893 ...
## $ Thoothukodi : num [1:15] 4008 4316 4265 4349 4612 ...
## $ Vellore : num [1:15] 7175 7452 7485 7571 8122 ...
## $ Villupuram : num [1:15] 3921 3953 4015 3794 4018 ...
## $ Virudhunagar : num [1:15] 5155 5957 5594 5616 6087 ...
## - attr(*, "spec")=
## .. cols(
## .. Year = col_character(),
## .. Description = col_character(),
## .. Chennai = col_double(),
## .. Coimbatore = col_double(),
## .. Cuddalore = col_double(),
## .. Dharmapuri = col_double(),
## .. Dindigul = col_double(),
## .. Erode = col_double(),
## .. Kancheepuram = col_double(),
## .. Kanniyakumari = col_double(),
## .. Karur = col_double(),
## .. Krishnagiri = col_double(),
## .. Madurai = col_double(),
## .. Nagapattinam = col_double(),
## .. Namakkal = col_double(),
## .. Perambalur = col_double(),
## .. Pudukkotai = col_double(),
## .. Ramanathapuram = col_double(),
## .. Salem = col_double(),
## .. Sivagangai = col_double(),
## .. Thanjavur = col_double(),
## .. `The Nilgris` = col_double(),
## .. Theni = col_double(),
## .. Thiruchirappalli = col_double(),
## .. Thirunelveli = col_double(),
## .. Thiruvallur = col_double(),
## .. Thiruvannamalai = col_double(),
## .. Thiruvarur = col_double(),
## .. Thoothukodi = col_double(),
## .. Vellore = col_double(),
## .. Villupuram = col_double(),
## .. Virudhunagar = col_double()
## .. )
summary(data) #view summary of data
## Year Description Chennai Coimbatore
## Length:15 Length:15 Min. : -3.705 Min. : -2.073
## Class :character Class :character 1st Qu.: 5.391 1st Qu.: 8.218
## Mode :character Mode :character Median :12460.290 Median :11543.990
## Mean : 7501.308 Mean : 7636.176
## 3rd Qu.:13523.555 3rd Qu.:13592.655
## Max. :16670.560 Max. :18205.540
## Cuddalore Dharmapuri Dindigul Erode
## Min. : 0.829 Min. : -1.061 Min. : -2.857 Min. : -3.197
## 1st Qu.: 6.676 1st Qu.: 6.427 1st Qu.: 6.191 1st Qu.: 7.234
## Median :4080.010 Median :2316.500 Median :4046.570 Median :6230.110
## Mean :2647.792 Mean :1461.399 Mean :2426.587 Mean :3835.403
## 3rd Qu.:4916.150 3rd Qu.:2509.850 3rd Qu.:4263.605 3rd Qu.:6628.515
## Max. :5950.460 Max. :3491.910 Max. :5543.960 Max. :8977.030
## Kancheepuram Kanniyakumari Karur Krishnagiri
## Min. : -0.685 Min. : 0.361 Min. : -6.449 Min. : 0.317
## 1st Qu.: 7.713 1st Qu.: 8.359 1st Qu.: 9.470 1st Qu.: 8.511
## Median : 6845.130 Median :3959.060 Median :1990.600 Median :2414.740
## Mean : 4436.519 Mean :2599.614 Mean :1274.735 Mean :1636.714
## 3rd Qu.: 7845.755 3rd Qu.:4544.930 3rd Qu.:2257.080 3rd Qu.:2864.645
## Max. :10481.570 Max. :6267.260 Max. :3033.690 Max. :4056.190
## Madurai Nagapattinam Namakkal Perambalur
## Min. : -0.843 Min. : -6.542 Min. : -9.929 Min. : -7.678
## 1st Qu.: 6.977 1st Qu.: 4.615 1st Qu.: 7.973 1st Qu.: 1.417
## Median :5627.750 Median :2671.120 Median :3850.410 Median :1318.950
## Mean :3527.834 Mean :1563.301 Mean :2392.382 Mean : 772.813
## 3rd Qu.:6177.510 3rd Qu.:2915.915 3rd Qu.:4363.840 3rd Qu.:1457.385
## Max. :8248.530 Max. :3277.020 Max. :5424.440 Max. :1593.450
## Pudukkotai Ramanathapuram Salem Sivagangai
## Min. : -1.464 Min. : -1.498 Min. : -3.237 Min. : 1.298
## 1st Qu.: 6.020 1st Qu.: 4.725 1st Qu.: 6.241 1st Qu.: 6.219
## Median :2402.600 Median :2193.920 Median :6586.380 Median :1872.150
## Mean :1482.451 Mean :1352.031 Mean :4047.625 Mean :1178.045
## 3rd Qu.:2623.250 3rd Qu.:2477.440 3rd Qu.:7069.940 3rd Qu.:2081.610
## Max. :3331.440 Max. :2904.960 Max. :9468.380 Max. :2708.330
## Thanjavur The Nilgris Theni Thiruchirappalli
## Min. : -3.746 Min. : -2.941 Min. : -4.183 Min. : -0.157
## 1st Qu.: 7.099 1st Qu.: 7.190 1st Qu.: 2.971 1st Qu.: 8.133
## Median :3907.850 Median :1516.620 Median :1967.480 Median :5286.930
## Mean :2403.341 Mean : 983.934 Mean :1150.737 Mean :3456.095
## 3rd Qu.:4298.460 3rd Qu.:1743.395 3rd Qu.:2094.445 3rd Qu.:6059.255
## Max. :5438.130 Max. :2325.920 Max. :2437.570 Max. :8360.730
## Thirunelveli Thiruvallur Thiruvannamalai Thiruvarur
## Min. : 0.422 Min. : -1.842 Min. : -5.075 Min. : -11.655
## 1st Qu.: 6.520 1st Qu.: 8.863 1st Qu.: 6.520 1st Qu.: 4.718
## Median :5691.770 Median : 6577.750 Median :2978.060 Median :1774.790
## Mean :3584.937 Mean : 4522.094 Mean :1853.143 Mean :1040.036
## 3rd Qu.:6406.090 3rd Qu.: 8028.290 3rd Qu.:3247.515 3rd Qu.:1917.950
## Max. :8302.850 Max. :11040.030 Max. :4314.280 Max. :2131.000
## Thoothukodi Vellore Villupuram Virudhunagar
## Min. : -1.192 Min. : 0.450 Min. : -5.497 Min. : -6.088
## 1st Qu.: 6.579 1st Qu.: 7.604 1st Qu.: 5.405 1st Qu.: 8.677
## Median :4008.460 Median : 7175.180 Median :3793.900 Median :5155.110
## Mean :2593.326 Mean : 4442.825 Mean :2284.989 Mean :3352.928
## 3rd Qu.:4480.850 3rd Qu.: 7846.860 3rd Qu.:4016.195 3rd Qu.:6021.940
## Max. :6128.820 Max. :10343.450 Max. :5120.190 Max. :7891.420
colSums(is.na(data))
## Year Description Chennai Coimbatore
## 0 0 0 0
## Cuddalore Dharmapuri Dindigul Erode
## 0 0 0 0
## Kancheepuram Kanniyakumari Karur Krishnagiri
## 0 0 0 0
## Madurai Nagapattinam Namakkal Perambalur
## 0 0 0 0
## Pudukkotai Ramanathapuram Salem Sivagangai
## 0 0 0 0
## Thanjavur The Nilgris Theni Thiruchirappalli
## 0 0 0 0
## Thirunelveli Thiruvallur Thiruvannamalai Thiruvarur
## 0 0 0 0
## Thoothukodi Vellore Villupuram Virudhunagar
## 0 0 0 0
data <- data[complete.cases(data),]
colSums(is.na(data))
## Year Description Chennai Coimbatore
## 0 0 0 0
## Cuddalore Dharmapuri Dindigul Erode
## 0 0 0 0
## Kancheepuram Kanniyakumari Karur Krishnagiri
## 0 0 0 0
## Madurai Nagapattinam Namakkal Perambalur
## 0 0 0 0
## Pudukkotai Ramanathapuram Salem Sivagangai
## 0 0 0 0
## Thanjavur The Nilgris Theni Thiruchirappalli
## 0 0 0 0
## Thirunelveli Thiruvallur Thiruvannamalai Thiruvarur
## 0 0 0 0
## Thoothukodi Vellore Villupuram Virudhunagar
## 0 0 0 0
str(data)
## tibble [15 x 32] (S3: tbl_df/tbl/data.frame)
## $ Year : chr [1:15] "1999-00" "2000-01" "2001-02" "2002-03" ...
## $ Description : chr [1:15] "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" ...
## $ Chennai : num [1:15] 13215 12726 12460 12812 13832 ...
## $ Coimbatore : num [1:15] 11544 12716 12453 13171 14014 ...
## $ Cuddalore : num [1:15] 4080 4431 4467 4749 5084 ...
## $ Dharmapuri : num [1:15] 2316 2432 2407 2497 2523 ...
## $ Dindigul : num [1:15] 4048 4166 4047 4085 4362 ...
## $ Erode : num [1:15] 6230 6669 6455 6453 6589 ...
## $ Kancheepuram : num [1:15] 6845 7306 7529 7477 8163 ...
## $ Kanniyakumari : num [1:15] 3959 4291 4306 4411 4679 ...
## $ Karur : num [1:15] 1991 2205 2063 2103 2309 ...
## $ Krishnagiri : num [1:15] 2415 2594 2602 2810 2919 ...
## $ Madurai : num [1:15] 5628 5932 5882 5973 6382 ...
## $ Nagapattinam : num [1:15] 2788 2946 2753 2671 2916 ...
## $ Namakkal : num [1:15] 4114 4453 4275 3850 4091 ...
## $ Perambalur : num [1:15] 1593 1495 1435 1325 1319 ...
## $ Pudukkotai : num [1:15] 2403 2569 2593 2555 2653 ...
## $ Ramanathapuram : num [1:15] 2194 2397 2361 2453 2502 ...
## $ Salem : num [1:15] 6804 6807 6586 6896 7244 ...
## $ Sivagangai : num [1:15] 1872 1979 2004 2059 2104 ...
## $ Thanjavur : num [1:15] 3908 4209 4051 4207 4388 ...
## $ The Nilgris : num [1:15] 1517 1616 1569 1605 1871 ...
## $ Theni : num [1:15] 2048 2081 2108 2053 1967 ...
## $ Thiruchirappalli: num [1:15] 5287 5682 5674 5849 6269 ...
## $ Thirunelveli : num [1:15] 5692 5959 5994 6393 6420 ...
## $ Thiruvallur : num [1:15] 6578 7411 7275 7730 8327 ...
## $ Thiruvannamalai : num [1:15] 2978 3135 3236 3072 3259 ...
## $ Thiruvarur : num [1:15] 1881 2088 1845 1775 1893 ...
## $ Thoothukodi : num [1:15] 4008 4316 4265 4349 4612 ...
## $ Vellore : num [1:15] 7175 7452 7485 7571 8122 ...
## $ Villupuram : num [1:15] 3921 3953 4015 3794 4018 ...
## $ Virudhunagar : num [1:15] 5155 5957 5594 5616 6087 ...
##converting character data to factor
data$Year <- as.factor(data$Year)
data$Description <- as.factor(data$Description)
subdata <- data[which(data$Description == "GDP (in Rs. Cr.)"),
names(data) %in%
c("Year","Chennai")]
year <- sample(subdata$Year)
chennai <- round(data$Chennai)
plot.default(year,subdata$Chennai,
type = 'p',
col = "blue",
axes = FALSE,
main = " The GDP of Chennai(State Capital)")
axis(side = 1, at = as.numeric(subdata$Year), labels = subdata$Year)
axis(side=2, at=subdata$Chennai, labels = round(subdata$Chennai))
subdata <- data[which(data$Description == "GDP (in Rs. Cr.)"),
names(data) %in%
c("Year","Coimbatore", "Erode")]
cities <- as.matrix(cbind(subdata$Coimbatore, subdata$Erode))
colnames(cities) <- c('Coimbatore', 'Erode')
rownames(cities) <- subdata$Year
max = max(subdata$Coimbatore)
barplot(t(cities),
beside = T,
col = viridis(2, begin = 1, end = 0,direction = 1),
ylim = c(0, max),
xlab = "Years",
ylab = "GDP (in Rs. Cr.)",
main = "Comparison of GDP between Coimbatore and Erode")
legend("topleft",
colnames(cities),
fill = viridis(2, begin = 1, end = 0,direction = 1),
cex = 0.7,
xjust = 1)
subdata1 <-data[which(data$Description == "GDP (in Rs. Cr.)"),
names(data) %in%
c("Year","Chennai","Coimbatore", "Erode", "Madurai")]
cities <- as.matrix(cbind(subdata1$Coimbatore, subdata1$Erode, subdata1$Chennai,subdata1$Madurai))
rownames(cities) <- subdata$Year
colnames(cities) <- c('Coimbatore', 'Erode', 'Chennai', 'Madurai')
matplot(cities,
type = "o",
pch = 1,
col = magma(4, begin = 1, end = 0,direction = 1),
lwd = 2,
lty = 1,
main = "GDP comparison for 4 cities",
ylab = "GDP (in Rs. Cr.) "
)
legend("topleft",
colnames(cities),
fill = magma(4, begin = 1, end = 0,direction = 1),
cex = 0.7,
xjust = 1)
subdata2 <- subset(data,
Year == '2000-01' & Description == "GDP (in Rs. Cr.)",
select = -c(Year,Description))
subdata2 <- as.matrix(subdata2)
lbls <- colnames(subdata2)
pct <- round(subdata2/sum(subdata2)*100)
lbls <- paste(paste(lbls,round(pct),"%",sep=" "))
pie(subdata2,
labels = lbls,
main = "The GDP of Cities in 2000-01",
col = rainbow(length(lbls)))
salem <- density(data$Salem)
plot(salem,
main = "Density of GDP of Salem",
col = "blue")
subdata <- subset(data,
Description == "GDP (in Rs. Cr.)",
select = -c(Year,Description))
year <- data[which(data$Description == "GDP (in Rs. Cr.)"),
names(data) %in%
c("Year")]
boxplot(subdata,
col = c(rainbow(7),magma(10),viridis(10),"chocolate","pink","purple"),
ylim = c(500,19500),
main = "Distribution of GDP",
xlab = "cities",
ylab = "GDP over 4 cities"
)
legend("topright",
colnames(subdata),
fill = c(rainbow(7),magma(10),viridis(10),"chocolate","pink","purple"),
cex = 0.6,
ncol = 3)
hist(as.matrix(subdata),
xlab = "GDP",
main = "frequency of GDP",
breaks = 30,
xlim = c(0,10000),
col = "chocolate")