#importing the libraries

library(ggplot2)
library(stringr)
library(viridis)
## Loading required package: viridisLite
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(readr)

Getting the Data

data <- read_csv("C:/Users/jayas/Desktop/Data Visualization Project/Data-Visualisation-of-GDP-TamilNadu/Data-Visualisation-of-GDP-TamilNadu/gdp_Tamilnadu.csv") #read the data
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Year = col_character(),
##   Description = col_character()
## )
## See spec(...) for full column specifications.

Exploringt the data

class(data) #data frame
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
dim(data) #view number of rows and columns
## [1] 15 32
names(data) #view column names
##  [1] "Year"             "Description"      "Chennai"          "Coimbatore"      
##  [5] "Cuddalore"        "Dharmapuri"       "Dindigul"         "Erode"           
##  [9] "Kancheepuram"     "Kanniyakumari"    "Karur"            "Krishnagiri"     
## [13] "Madurai"          "Nagapattinam"     "Namakkal"         "Perambalur"      
## [17] "Pudukkotai"       "Ramanathapuram"   "Salem"            "Sivagangai"      
## [21] "Thanjavur"        "The Nilgris"      "Theni"            "Thiruchirappalli"
## [25] "Thirunelveli"     "Thiruvallur"      "Thiruvannamalai"  "Thiruvarur"      
## [29] "Thoothukodi"      "Vellore"          "Villupuram"       "Virudhunagar"
head(data) #view first few observations
## # A tibble: 6 x 32
##   Year  Description Chennai Coimbatore Cuddalore Dharmapuri Dindigul Erode
##   <chr> <chr>         <dbl>      <dbl>     <dbl>      <dbl>    <dbl> <dbl>
## 1 1999~ GDP (in Rs~  13215.     11544.     4080.      2316.    4048. 6230.
## 2 2000~ GDP (in Rs~  12726.     12716.     4431.      2432.    4166. 6669.
## 3 2001~ GDP (in Rs~  12460.     12453.     4467.      2407.    4047. 6455.
## 4 2002~ GDP (in Rs~  12812.     13171.     4749.      2497.    4085. 6453.
## 5 2003~ GDP (in Rs~  13832.     14014.     5084.      2523.    4362. 6589.
## 6 2004~ GDP (in Rs~  15009.     15546.     5389.      2973.    4916. 7763.
## # ... with 24 more variables: Kancheepuram <dbl>, Kanniyakumari <dbl>,
## #   Karur <dbl>, Krishnagiri <dbl>, Madurai <dbl>, Nagapattinam <dbl>,
## #   Namakkal <dbl>, Perambalur <dbl>, Pudukkotai <dbl>, Ramanathapuram <dbl>,
## #   Salem <dbl>, Sivagangai <dbl>, Thanjavur <dbl>, `The Nilgris` <dbl>,
## #   Theni <dbl>, Thiruchirappalli <dbl>, Thirunelveli <dbl>, Thiruvallur <dbl>,
## #   Thiruvannamalai <dbl>, Thiruvarur <dbl>, Thoothukodi <dbl>, Vellore <dbl>,
## #   Villupuram <dbl>, Virudhunagar <dbl>
str(data) #view the structure of data
## tibble [15 x 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Year            : chr [1:15] "1999-00" "2000-01" "2001-02" "2002-03" ...
##  $ Description     : chr [1:15] "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" ...
##  $ Chennai         : num [1:15] 13215 12726 12460 12812 13832 ...
##  $ Coimbatore      : num [1:15] 11544 12716 12453 13171 14014 ...
##  $ Cuddalore       : num [1:15] 4080 4431 4467 4749 5084 ...
##  $ Dharmapuri      : num [1:15] 2316 2432 2407 2497 2523 ...
##  $ Dindigul        : num [1:15] 4048 4166 4047 4085 4362 ...
##  $ Erode           : num [1:15] 6230 6669 6455 6453 6589 ...
##  $ Kancheepuram    : num [1:15] 6845 7306 7529 7477 8163 ...
##  $ Kanniyakumari   : num [1:15] 3959 4291 4306 4411 4679 ...
##  $ Karur           : num [1:15] 1991 2205 2063 2103 2309 ...
##  $ Krishnagiri     : num [1:15] 2415 2594 2602 2810 2919 ...
##  $ Madurai         : num [1:15] 5628 5932 5882 5973 6382 ...
##  $ Nagapattinam    : num [1:15] 2788 2946 2753 2671 2916 ...
##  $ Namakkal        : num [1:15] 4114 4453 4275 3850 4091 ...
##  $ Perambalur      : num [1:15] 1593 1495 1435 1325 1319 ...
##  $ Pudukkotai      : num [1:15] 2403 2569 2593 2555 2653 ...
##  $ Ramanathapuram  : num [1:15] 2194 2397 2361 2453 2502 ...
##  $ Salem           : num [1:15] 6804 6807 6586 6896 7244 ...
##  $ Sivagangai      : num [1:15] 1872 1979 2004 2059 2104 ...
##  $ Thanjavur       : num [1:15] 3908 4209 4051 4207 4388 ...
##  $ The Nilgris     : num [1:15] 1517 1616 1569 1605 1871 ...
##  $ Theni           : num [1:15] 2048 2081 2108 2053 1967 ...
##  $ Thiruchirappalli: num [1:15] 5287 5682 5674 5849 6269 ...
##  $ Thirunelveli    : num [1:15] 5692 5959 5994 6393 6420 ...
##  $ Thiruvallur     : num [1:15] 6578 7411 7275 7730 8327 ...
##  $ Thiruvannamalai : num [1:15] 2978 3135 3236 3072 3259 ...
##  $ Thiruvarur      : num [1:15] 1881 2088 1845 1775 1893 ...
##  $ Thoothukodi     : num [1:15] 4008 4316 4265 4349 4612 ...
##  $ Vellore         : num [1:15] 7175 7452 7485 7571 8122 ...
##  $ Villupuram      : num [1:15] 3921 3953 4015 3794 4018 ...
##  $ Virudhunagar    : num [1:15] 5155 5957 5594 5616 6087 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Year = col_character(),
##   ..   Description = col_character(),
##   ..   Chennai = col_double(),
##   ..   Coimbatore = col_double(),
##   ..   Cuddalore = col_double(),
##   ..   Dharmapuri = col_double(),
##   ..   Dindigul = col_double(),
##   ..   Erode = col_double(),
##   ..   Kancheepuram = col_double(),
##   ..   Kanniyakumari = col_double(),
##   ..   Karur = col_double(),
##   ..   Krishnagiri = col_double(),
##   ..   Madurai = col_double(),
##   ..   Nagapattinam = col_double(),
##   ..   Namakkal = col_double(),
##   ..   Perambalur = col_double(),
##   ..   Pudukkotai = col_double(),
##   ..   Ramanathapuram = col_double(),
##   ..   Salem = col_double(),
##   ..   Sivagangai = col_double(),
##   ..   Thanjavur = col_double(),
##   ..   `The Nilgris` = col_double(),
##   ..   Theni = col_double(),
##   ..   Thiruchirappalli = col_double(),
##   ..   Thirunelveli = col_double(),
##   ..   Thiruvallur = col_double(),
##   ..   Thiruvannamalai = col_double(),
##   ..   Thiruvarur = col_double(),
##   ..   Thoothukodi = col_double(),
##   ..   Vellore = col_double(),
##   ..   Villupuram = col_double(),
##   ..   Virudhunagar = col_double()
##   .. )
summary(data) #view summary of data
##      Year           Description           Chennai            Coimbatore       
##  Length:15          Length:15          Min.   :   -3.705   Min.   :   -2.073  
##  Class :character   Class :character   1st Qu.:    5.391   1st Qu.:    8.218  
##  Mode  :character   Mode  :character   Median :12460.290   Median :11543.990  
##                                        Mean   : 7501.308   Mean   : 7636.176  
##                                        3rd Qu.:13523.555   3rd Qu.:13592.655  
##                                        Max.   :16670.560   Max.   :18205.540  
##    Cuddalore          Dharmapuri          Dindigul            Erode         
##  Min.   :   0.829   Min.   :  -1.061   Min.   :  -2.857   Min.   :  -3.197  
##  1st Qu.:   6.676   1st Qu.:   6.427   1st Qu.:   6.191   1st Qu.:   7.234  
##  Median :4080.010   Median :2316.500   Median :4046.570   Median :6230.110  
##  Mean   :2647.792   Mean   :1461.399   Mean   :2426.587   Mean   :3835.403  
##  3rd Qu.:4916.150   3rd Qu.:2509.850   3rd Qu.:4263.605   3rd Qu.:6628.515  
##  Max.   :5950.460   Max.   :3491.910   Max.   :5543.960   Max.   :8977.030  
##   Kancheepuram       Kanniyakumari          Karur           Krishnagiri      
##  Min.   :   -0.685   Min.   :   0.361   Min.   :  -6.449   Min.   :   0.317  
##  1st Qu.:    7.713   1st Qu.:   8.359   1st Qu.:   9.470   1st Qu.:   8.511  
##  Median : 6845.130   Median :3959.060   Median :1990.600   Median :2414.740  
##  Mean   : 4436.519   Mean   :2599.614   Mean   :1274.735   Mean   :1636.714  
##  3rd Qu.: 7845.755   3rd Qu.:4544.930   3rd Qu.:2257.080   3rd Qu.:2864.645  
##  Max.   :10481.570   Max.   :6267.260   Max.   :3033.690   Max.   :4056.190  
##     Madurai          Nagapattinam         Namakkal          Perambalur      
##  Min.   :  -0.843   Min.   :  -6.542   Min.   :  -9.929   Min.   :  -7.678  
##  1st Qu.:   6.977   1st Qu.:   4.615   1st Qu.:   7.973   1st Qu.:   1.417  
##  Median :5627.750   Median :2671.120   Median :3850.410   Median :1318.950  
##  Mean   :3527.834   Mean   :1563.301   Mean   :2392.382   Mean   : 772.813  
##  3rd Qu.:6177.510   3rd Qu.:2915.915   3rd Qu.:4363.840   3rd Qu.:1457.385  
##  Max.   :8248.530   Max.   :3277.020   Max.   :5424.440   Max.   :1593.450  
##    Pudukkotai       Ramanathapuram         Salem            Sivagangai      
##  Min.   :  -1.464   Min.   :  -1.498   Min.   :  -3.237   Min.   :   1.298  
##  1st Qu.:   6.020   1st Qu.:   4.725   1st Qu.:   6.241   1st Qu.:   6.219  
##  Median :2402.600   Median :2193.920   Median :6586.380   Median :1872.150  
##  Mean   :1482.451   Mean   :1352.031   Mean   :4047.625   Mean   :1178.045  
##  3rd Qu.:2623.250   3rd Qu.:2477.440   3rd Qu.:7069.940   3rd Qu.:2081.610  
##  Max.   :3331.440   Max.   :2904.960   Max.   :9468.380   Max.   :2708.330  
##    Thanjavur         The Nilgris           Theni          Thiruchirappalli  
##  Min.   :  -3.746   Min.   :  -2.941   Min.   :  -4.183   Min.   :  -0.157  
##  1st Qu.:   7.099   1st Qu.:   7.190   1st Qu.:   2.971   1st Qu.:   8.133  
##  Median :3907.850   Median :1516.620   Median :1967.480   Median :5286.930  
##  Mean   :2403.341   Mean   : 983.934   Mean   :1150.737   Mean   :3456.095  
##  3rd Qu.:4298.460   3rd Qu.:1743.395   3rd Qu.:2094.445   3rd Qu.:6059.255  
##  Max.   :5438.130   Max.   :2325.920   Max.   :2437.570   Max.   :8360.730  
##   Thirunelveli       Thiruvallur        Thiruvannamalai      Thiruvarur      
##  Min.   :   0.422   Min.   :   -1.842   Min.   :  -5.075   Min.   : -11.655  
##  1st Qu.:   6.520   1st Qu.:    8.863   1st Qu.:   6.520   1st Qu.:   4.718  
##  Median :5691.770   Median : 6577.750   Median :2978.060   Median :1774.790  
##  Mean   :3584.937   Mean   : 4522.094   Mean   :1853.143   Mean   :1040.036  
##  3rd Qu.:6406.090   3rd Qu.: 8028.290   3rd Qu.:3247.515   3rd Qu.:1917.950  
##  Max.   :8302.850   Max.   :11040.030   Max.   :4314.280   Max.   :2131.000  
##   Thoothukodi          Vellore            Villupuram        Virudhunagar     
##  Min.   :  -1.192   Min.   :    0.450   Min.   :  -5.497   Min.   :  -6.088  
##  1st Qu.:   6.579   1st Qu.:    7.604   1st Qu.:   5.405   1st Qu.:   8.677  
##  Median :4008.460   Median : 7175.180   Median :3793.900   Median :5155.110  
##  Mean   :2593.326   Mean   : 4442.825   Mean   :2284.989   Mean   :3352.928  
##  3rd Qu.:4480.850   3rd Qu.: 7846.860   3rd Qu.:4016.195   3rd Qu.:6021.940  
##  Max.   :6128.820   Max.   :10343.450   Max.   :5120.190   Max.   :7891.420

Data Cleaning

colSums(is.na(data))
##             Year      Description          Chennai       Coimbatore 
##                0                0                0                0 
##        Cuddalore       Dharmapuri         Dindigul            Erode 
##                0                0                0                0 
##     Kancheepuram    Kanniyakumari            Karur      Krishnagiri 
##                0                0                0                0 
##          Madurai     Nagapattinam         Namakkal       Perambalur 
##                0                0                0                0 
##       Pudukkotai   Ramanathapuram            Salem       Sivagangai 
##                0                0                0                0 
##        Thanjavur      The Nilgris            Theni Thiruchirappalli 
##                0                0                0                0 
##     Thirunelveli      Thiruvallur  Thiruvannamalai       Thiruvarur 
##                0                0                0                0 
##      Thoothukodi          Vellore       Villupuram     Virudhunagar 
##                0                0                0                0
data <- data[complete.cases(data),]

colSums(is.na(data))
##             Year      Description          Chennai       Coimbatore 
##                0                0                0                0 
##        Cuddalore       Dharmapuri         Dindigul            Erode 
##                0                0                0                0 
##     Kancheepuram    Kanniyakumari            Karur      Krishnagiri 
##                0                0                0                0 
##          Madurai     Nagapattinam         Namakkal       Perambalur 
##                0                0                0                0 
##       Pudukkotai   Ramanathapuram            Salem       Sivagangai 
##                0                0                0                0 
##        Thanjavur      The Nilgris            Theni Thiruchirappalli 
##                0                0                0                0 
##     Thirunelveli      Thiruvallur  Thiruvannamalai       Thiruvarur 
##                0                0                0                0 
##      Thoothukodi          Vellore       Villupuram     Virudhunagar 
##                0                0                0                0
str(data)
## tibble [15 x 32] (S3: tbl_df/tbl/data.frame)
##  $ Year            : chr [1:15] "1999-00" "2000-01" "2001-02" "2002-03" ...
##  $ Description     : chr [1:15] "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" "GDP (in Rs. Cr.)" ...
##  $ Chennai         : num [1:15] 13215 12726 12460 12812 13832 ...
##  $ Coimbatore      : num [1:15] 11544 12716 12453 13171 14014 ...
##  $ Cuddalore       : num [1:15] 4080 4431 4467 4749 5084 ...
##  $ Dharmapuri      : num [1:15] 2316 2432 2407 2497 2523 ...
##  $ Dindigul        : num [1:15] 4048 4166 4047 4085 4362 ...
##  $ Erode           : num [1:15] 6230 6669 6455 6453 6589 ...
##  $ Kancheepuram    : num [1:15] 6845 7306 7529 7477 8163 ...
##  $ Kanniyakumari   : num [1:15] 3959 4291 4306 4411 4679 ...
##  $ Karur           : num [1:15] 1991 2205 2063 2103 2309 ...
##  $ Krishnagiri     : num [1:15] 2415 2594 2602 2810 2919 ...
##  $ Madurai         : num [1:15] 5628 5932 5882 5973 6382 ...
##  $ Nagapattinam    : num [1:15] 2788 2946 2753 2671 2916 ...
##  $ Namakkal        : num [1:15] 4114 4453 4275 3850 4091 ...
##  $ Perambalur      : num [1:15] 1593 1495 1435 1325 1319 ...
##  $ Pudukkotai      : num [1:15] 2403 2569 2593 2555 2653 ...
##  $ Ramanathapuram  : num [1:15] 2194 2397 2361 2453 2502 ...
##  $ Salem           : num [1:15] 6804 6807 6586 6896 7244 ...
##  $ Sivagangai      : num [1:15] 1872 1979 2004 2059 2104 ...
##  $ Thanjavur       : num [1:15] 3908 4209 4051 4207 4388 ...
##  $ The Nilgris     : num [1:15] 1517 1616 1569 1605 1871 ...
##  $ Theni           : num [1:15] 2048 2081 2108 2053 1967 ...
##  $ Thiruchirappalli: num [1:15] 5287 5682 5674 5849 6269 ...
##  $ Thirunelveli    : num [1:15] 5692 5959 5994 6393 6420 ...
##  $ Thiruvallur     : num [1:15] 6578 7411 7275 7730 8327 ...
##  $ Thiruvannamalai : num [1:15] 2978 3135 3236 3072 3259 ...
##  $ Thiruvarur      : num [1:15] 1881 2088 1845 1775 1893 ...
##  $ Thoothukodi     : num [1:15] 4008 4316 4265 4349 4612 ...
##  $ Vellore         : num [1:15] 7175 7452 7485 7571 8122 ...
##  $ Villupuram      : num [1:15] 3921 3953 4015 3794 4018 ...
##  $ Virudhunagar    : num [1:15] 5155 5957 5594 5616 6087 ...
##converting character data to factor
data$Year <- as.factor(data$Year)
data$Description <- as.factor(data$Description)

Base Package

The GDP of Chennai over 1999 to 2007 : scatterplot

subdata <-  data[which(data$Description == "GDP (in Rs. Cr.)"),
                 names(data) %in% 
                     c("Year","Chennai")]
year <- sample(subdata$Year)

chennai <- round(data$Chennai)

plot.default(year,subdata$Chennai,
             type = 'p',
             col = "blue",
             axes = FALSE,
             main = " The GDP of Chennai(State Capital)")
axis(side = 1, at = as.numeric(subdata$Year), labels = subdata$Year)
axis(side=2, at=subdata$Chennai, labels = round(subdata$Chennai))

The Comparison of GDP between two cites - Coimbatore and Erode : Bar Plot

subdata <-  data[which(data$Description == "GDP (in Rs. Cr.)"),
                 names(data) %in% 
                     c("Year","Coimbatore", "Erode")]

cities <- as.matrix(cbind(subdata$Coimbatore, subdata$Erode))
colnames(cities) <- c('Coimbatore', 'Erode')
rownames(cities) <- subdata$Year

max = max(subdata$Coimbatore)
barplot(t(cities),
        beside = T,
        col = viridis(2, begin = 1, end = 0,direction = 1),
        ylim = c(0, max),
        xlab = "Years",
        ylab = "GDP (in Rs. Cr.)",
        main = "Comparison of GDP between Coimbatore and Erode")
legend("topleft",
       colnames(cities),
       fill = viridis(2, begin = 1, end = 0,direction = 1),
       cex = 0.7,
       xjust = 1)

GDP comparison for 4 cities : Line Chart

subdata1 <-data[which(data$Description == "GDP (in Rs. Cr.)"),
                names(data) %in% 
                    c("Year","Chennai","Coimbatore", "Erode", "Madurai")]
cities <- as.matrix(cbind(subdata1$Coimbatore, subdata1$Erode, subdata1$Chennai,subdata1$Madurai))
rownames(cities) <- subdata$Year
colnames(cities) <- c('Coimbatore', 'Erode', 'Chennai', 'Madurai')

matplot(cities,
        type = "o",
        pch = 1,
        col =  magma(4, begin = 1, end = 0,direction = 1),
        lwd = 2,
        lty = 1,
        main = "GDP comparison for 4 cities",
        ylab = "GDP (in Rs. Cr.) "
)

legend("topleft",
       colnames(cities),
       fill = magma(4, begin = 1, end = 0,direction = 1),
       cex = 0.7,
       xjust = 1)

GDP comparison of all the cities : Pie Chart

subdata2 <- subset(data, 
                   Year == '2000-01' & Description == "GDP (in Rs. Cr.)",
                   select = -c(Year,Description))
subdata2 <- as.matrix(subdata2)
lbls <-  colnames(subdata2)
pct <- round(subdata2/sum(subdata2)*100)
lbls <- paste(paste(lbls,round(pct),"%",sep=" "))

pie(subdata2, 
    labels = lbls, 
    main = "The GDP of Cities in 2000-01",
    col = rainbow(length(lbls)))

density of GDP of Salem over the year : Density Plot

salem <- density(data$Salem)

plot(salem, 
     main = "Density of GDP of Salem",
     col = "blue")

Distribution of GDP of cities in terms of five number summary (“minimum”, first quartile (Q1), median, third quartile (Q3), and “maximum”) : Box Plot

subdata <-  subset(data, 
                   Description == "GDP (in Rs. Cr.)",
                   select = -c(Year,Description))

year <- data[which(data$Description == "GDP (in Rs. Cr.)"),
             names(data) %in% 
                 c("Year")]


boxplot(subdata,
        col = c(rainbow(7),magma(10),viridis(10),"chocolate","pink","purple"),
        ylim = c(500,19500),
        main = "Distribution of GDP",
        xlab = "cities",
        ylab = "GDP over 4 cities"
)

legend("topright",
       colnames(subdata),
       fill = c(rainbow(7),magma(10),viridis(10),"chocolate","pink","purple"),
       cex = 0.6,
       ncol = 3)

Frequency of GDP : Histogram

hist(as.matrix(subdata),
     xlab = "GDP",
     main = "frequency of GDP",
     breaks = 30,
     xlim = c(0,10000),
     col = "chocolate")