Objective:

Data Exploration:

In this phase we will explore the dataset to find out the datastructure, NA’s, Null or missing values and also outliers. This will help to understand the data and fix the dateset if needed.

Data Structure of Midewest Dataset

## 'data.frame':    437 obs. of  28 variables:
##  $ PID                 : int  561 562 563 564 565 566 567 568 569 570 ...
##  $ county              : Factor w/ 320 levels "ADAMS","ALCONA",..: 1 3 25 26 28 30 33 35 36 37 ...
##  $ state               : Factor w/ 5 levels "IL","IN","MI",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ area                : num  0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
##  $ poptotal            : int  66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
##  $ popdensity          : num  1271 759 681 1812 324 ...
##  $ popwhite            : int  63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
##  $ popblack            : int  1702 3496 429 127 547 50 1 111 16 16559 ...
##  $ popamerindian       : int  98 19 35 46 14 65 8 30 8 331 ...
##  $ popasian            : int  249 48 16 150 5 195 15 61 23 8033 ...
##  $ popother            : int  124 9 34 1139 6 221 0 84 6 1596 ...
##  $ percwhite           : num  96.7 66.4 96.6 95.3 90.2 ...
##  $ percblack           : num  2.575 32.9 2.862 0.412 9.373 ...
##  $ percamerindan       : num  0.148 0.179 0.233 0.149 0.24 ...
##  $ percasian           : num  0.3768 0.4517 0.1067 0.4869 0.0857 ...
##  $ percother           : num  0.1876 0.0847 0.2268 3.6973 0.1028 ...
##  $ popadults           : int  43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
##  $ perchsd             : num  75.1 59.7 69.3 75.5 68.9 ...
##  $ percollege          : num  19.6 11.2 17 17.3 14.5 ...
##  $ percprof            : num  4.36 2.87 4.49 4.2 3.37 ...
##  $ poppovertyknown     : int  63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
##  $ percpovertyknown    : num  96.3 99.1 95 98.5 82.5 ...
##  $ percbelowpoverty    : num  13.15 32.24 12.07 7.21 13.52 ...
##  $ percchildbelowpovert: num  18 45.8 14 11.2 13 ...
##  $ percadultpoverty    : num  11.01 27.39 10.85 5.54 11.14 ...
##  $ percelderlypoverty  : num  12.44 25.23 12.7 6.22 19.2 ...
##  $ inmetro             : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ category            : Factor w/ 16 levels "AAR","AAU","AHR",..: 1 15 1 6 1 1 13 1 1 8 ...
##     poptotal          popwhite          popblack       popamerindian    
##  Min.   :   1701   Min.   :    416   Min.   :      0   Min.   :    4.0  
##  1st Qu.:  18840   1st Qu.:  18630   1st Qu.:     29   1st Qu.:   44.0  
##  Median :  35324   Median :  34471   Median :    201   Median :   94.0  
##  Mean   :  96130   Mean   :  81840   Mean   :  11024   Mean   :  343.1  
##  3rd Qu.:  75651   3rd Qu.:  72968   3rd Qu.:   1291   3rd Qu.:  288.0  
##  Max.   :5105067   Max.   :3204947   Max.   :1317147   Max.   :10289.0  
##     popasian         popother         perchsd        percollege    
##  Min.   :     0   Min.   :     0   Min.   :46.91   Min.   : 7.336  
##  1st Qu.:    35   1st Qu.:    20   1st Qu.:71.33   1st Qu.:14.114  
##  Median :   102   Median :    66   Median :74.25   Median :16.798  
##  Mean   :  1310   Mean   :  1613   Mean   :73.97   Mean   :18.273  
##  3rd Qu.:   401   3rd Qu.:   345   3rd Qu.:77.20   3rd Qu.:20.550  
##  Max.   :188565   Max.   :384119   Max.   :88.90   Max.   :48.079  
##     percprof       poppovertyknown   percpovertyknown percbelowpoverty
##  Min.   : 0.5203   Min.   :   1696   Min.   :80.90    Min.   : 2.180  
##  1st Qu.: 2.9980   1st Qu.:  18364   1st Qu.:96.89    1st Qu.: 9.199  
##  Median : 3.8142   Median :  33788   Median :98.17    Median :11.822  
##  Mean   : 4.4473   Mean   :  93642   Mean   :97.11    Mean   :12.511  
##  3rd Qu.: 4.9493   3rd Qu.:  72840   3rd Qu.:98.60    3rd Qu.:15.133  
##  Max.   :20.7913   Max.   :5023523   Max.   :99.86    Max.   :48.691  
##  percchildbelowpovert percadultpoverty percelderlypoverty
##  Min.   : 1.919       Min.   : 1.938   Min.   : 3.547    
##  1st Qu.:11.624       1st Qu.: 7.668   1st Qu.: 8.912    
##  Median :15.270       Median :10.008   Median :10.869    
##  Mean   :16.447       Mean   :10.919   Mean   :11.389    
##  3rd Qu.:20.352       3rd Qu.:13.182   3rd Qu.:13.412    
##  Max.   :64.308       Max.   :43.312   Max.   :31.162

Mean,Median, IQR and SKEW

kable(summaryDF)
mean median iqr skews
poptotal 96130.302059 35324.000000 56811.000000 12.1069181
popwhite 81839.915332 34471.000000 54338.000000 10.0411023
popblack 11023.881007 201.000000 1262.000000 13.3729696
popamerindian 343.109840 94.000000 244.000000 6.9385544
popasian 1310.464531 102.000000 366.000000 17.6782672
popother 1612.931350 66.000000 325.000000 20.1547635
perchsd 73.965546 74.246891 5.870015 -0.4437881
percollege 18.272736 16.797562 6.436168 1.5553955
percprof 4.447259 3.814239 1.951367 2.7120072
poppovertyknown 93642.283753 33788.000000 54476.000000 12.1393343
percpovertyknown 97.110267 98.169562 1.704064 -2.8455656
percbelowpoverty 12.510505 11.822313 5.934511 1.5095757
percchildbelowpovert 16.447464 15.270164 8.727791 1.3459864
percadultpoverty 10.918798 10.007610 5.514173 1.5864326
percelderlypoverty 11.389043 10.869119 4.500400 1.1334878

Correlation Plot

M <- cor(midwestCorrData)
par(mfrow=c(1,1))
corrplot(M, method="number")

Conclusion: With this Correlation plot we can very well see the direct correlation of poverty per race, however concluding poverty with education is still not having clear correlation. Need further investigation on this.

#### Generate histogram and count of missing data post data transformation
suppressMessages(library(VIM))

aggr_plot <- aggr(midwest, col=c('navyblue','red'), numbers=TRUE, sortVars=TRUE, 
                  labels=names(data), cex.axis=.7, gap=3, 
                  ylab=c("Histogram of missing data before Data Transformation","Pattern"))

## 
##  Variables sorted by number of missings: 
##              Variable Count
##                   PID     0
##                county     0
##                 state     0
##                  area     0
##              poptotal     0
##            popdensity     0
##              popwhite     0
##              popblack     0
##         popamerindian     0
##              popasian     0
##              popother     0
##             percwhite     0
##             percblack     0
##         percamerindan     0
##             percasian     0
##             percother     0
##             popadults     0
##               perchsd     0
##            percollege     0
##              percprof     0
##       poppovertyknown     0
##      percpovertyknown     0
##      percbelowpoverty     0
##  percchildbelowpovert     0
##      percadultpoverty     0
##    percelderlypoverty     0
##               inmetro     0
##              category     0

Conclusion: There are no missing data in the dataset.

#### Generate Boxplots to analyze outliers
meltData <- melt(midwest)
## Using county, state, category as id variables
par(mfrow=c(1,1))
boxplot(data=meltData, value~variable)

There are few outliers which can be observed in the boxplot,let’s have a closer look to the outlier data in all key variables.

Detailed Boxplots (Pre Data Transformation)

midwest1<-midwest[c(5,6,7,8,9,10)]
 midwest2<- midwest[c(11,12,13,14,15,16)]
 midwest3<- midwest[c(17,18,19,20,21)]
midwest4<-midwest[c(22,23,24,25,26)]

boxplot(midwest1, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

boxplot(midwest2, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

boxplot(midwest3, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

boxplot(midwest4, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

There are 437 observations with 28 variables with many outliers identified in poptotal, popwhite, popblack, popother, popadults, poppovertknown, percpovertyknown, percbelowpoverty, percchildbelowpoverty, percadultpoverty and percelderlypoverty columns. So we will focus on removing these outliers and bringing the data closer to Mean, for this we used Outlier package and removed outliers from poptotal,popwhite,popblack and popother column which has impact on other columns too and took away 4 outlier data from the dataset which we have observed above.

outlier_tf1= outlier(midwest$poptotal,logical=TRUE)
find_outlier = which(outlier_tf1==TRUE,arr.ind=TRUE)
midwest = midwest[-find_outlier,]

outlier_tf2= outlier(midwest$popwhite,logical=TRUE)
find_outlier = which(outlier_tf2==TRUE,arr.ind=TRUE)
midwest = midwest[-find_outlier,]

outlier_tf3= outlier(midwest$popblack,logical=TRUE)
find_outlier = which(outlier_tf3==TRUE,arr.ind=TRUE)
midwest = midwest[-find_outlier,]

outlier_tf4= outlier(midwest$popother,logical=TRUE)
find_outlier = which(outlier_tf4==TRUE,arr.ind=TRUE)
midwest = midwest[-find_outlier,]

Detailed Boxplots (Post outliers reduction)

midwest1<-midwest[c(5,6,7,8,9,10)]
 midwest2<- midwest[c(11,12,13,14,15,16)]
 midwest3<- midwest[c(17,18,19,20,21)]
midwest4<-midwest[c(22,23,24,25,26)]

boxplot(midwest1, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

boxplot(midwest2, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

boxplot(midwest3, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

boxplot(midwest4, las = 2, par(mar = c(12, 5, 4, 2) + 0.1));

str(midwest)
## 'data.frame':    433 obs. of  28 variables:
##  $ PID                 : int  561 562 563 564 565 566 567 568 569 570 ...
##  $ county              : Factor w/ 320 levels "ADAMS","ALCONA",..: 1 3 25 26 28 30 33 35 36 37 ...
##  $ state               : Factor w/ 5 levels "IL","IN","MI",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ area                : num  0.052 0.014 0.022 0.017 0.018 0.05 0.017 0.027 0.024 0.058 ...
##  $ poptotal            : int  66090 10626 14991 30806 5836 35688 5322 16805 13437 173025 ...
##  $ popdensity          : num  1271 759 681 1812 324 ...
##  $ popwhite            : int  63917 7054 14477 29344 5264 35157 5298 16519 13384 146506 ...
##  $ popblack            : int  1702 3496 429 127 547 50 1 111 16 16559 ...
##  $ popamerindian       : int  98 19 35 46 14 65 8 30 8 331 ...
##  $ popasian            : int  249 48 16 150 5 195 15 61 23 8033 ...
##  $ popother            : int  124 9 34 1139 6 221 0 84 6 1596 ...
##  $ percwhite           : num  96.7 66.4 96.6 95.3 90.2 ...
##  $ percblack           : num  2.575 32.9 2.862 0.412 9.373 ...
##  $ percamerindan       : num  0.148 0.179 0.233 0.149 0.24 ...
##  $ percasian           : num  0.3768 0.4517 0.1067 0.4869 0.0857 ...
##  $ percother           : num  0.1876 0.0847 0.2268 3.6973 0.1028 ...
##  $ popadults           : int  43298 6724 9669 19272 3979 23444 3583 11323 8825 95971 ...
##  $ perchsd             : num  75.1 59.7 69.3 75.5 68.9 ...
##  $ percollege          : num  19.6 11.2 17 17.3 14.5 ...
##  $ percprof            : num  4.36 2.87 4.49 4.2 3.37 ...
##  $ poppovertyknown     : int  63628 10529 14235 30337 4815 35107 5241 16455 13081 154934 ...
##  $ percpovertyknown    : num  96.3 99.1 95 98.5 82.5 ...
##  $ percbelowpoverty    : num  13.15 32.24 12.07 7.21 13.52 ...
##  $ percchildbelowpovert: num  18 45.8 14 11.2 13 ...
##  $ percadultpoverty    : num  11.01 27.39 10.85 5.54 11.14 ...
##  $ percelderlypoverty  : num  12.44 25.23 12.7 6.22 19.2 ...
##  $ inmetro             : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ category            : Factor w/ 16 levels "AAR","AAU","AHR",..: 1 15 1 6 1 1 13 1 1 8 ...

Conclusion: Post outlier reduction the dataset has improved by removing 4 outliers which has very high skewed values. So now the new dataset has 433 observations and 28 variables.

Data Preparation:

Data Tranformation with the use of sqldf, ggplot2, reshape2 and plyr packages.

To use the data effectively, we did some data manipulation as creation of another table stateView which provides exclusive data of State and also extended teh orginal Midwest Data by adding an extra column of state_longname which can provide complete name of the states in midwest. To perform all these data manipulation, we have leveraged sqldf package which comes very handy to get the relevant subset of data and use it further for plotting.

stateView<-sqldf(c("Drop table if exists state_View","CREATE table state_View
  (state_id varchar(4), state_shortname varchar(3),state_longname varchar(15), countyName varchar(10));",
  "Insert into state_View(state_shortname,countyName) Select state,county from midwest", 
  "Update state_View set state_longname='ILLINOIS' where state_View.state_shortname='IL'",
  "Update state_View set state_id='IL1' where state_View.state_shortname='IL'",

  "Update state_View set state_longname='INDIANA' where state_View.state_shortname='IN'",
  "Update state_View set state_id='IN2' where state_View.state_shortname='IN'",

  "Update state_View set state_longname='MICHIGAN' where state_View.state_shortname='MI'",
  "Update state_View set state_id='MI3' where state_View.state_shortname='MI'",

  "Update state_View set state_longname='OHIO' where state_View.state_shortname='OH'",
  "Update state_View set state_id='OH4' where state_View.state_shortname='OH'",

  "Update state_View set state_longname='WISCONSIN' where state_View.state_shortname='WI'",
  "Update state_View set state_id='WI5' where state_View.state_shortname='WI'",

  "Select * from state_View"))
## Loading required package: tcltk
midwest <- sqldf(c("Alter table midwest ADD state_idMW varchar(15)",

  "Update midwest set state_idMW='ILLINOIS' where midwest.state='IL'",
 "Update midwest set state_idMW='INDIANA' where midwest.state='IN'",
 "Update midwest set state_idMW='MICHIGAN' where midwest.state='MI'",
"Update midwest set state_idMW='OHIO' where midwest.state='OH'",
"Update midwest set state_idMW='WISCONSIN' where midwest.state='WI'",

  "Select * from midwest"))

race_pop<- sqldf(c("select sum(popwhite),sum(popblack),sum(popamerindian), sum(popasian), sum(popother),state_idMW
                   from midwest group by state_idMW"))
pop1<-sqldf(c("select sum(poptotal) As poptotal, state_idMW As state
                  from midwest group by state_idMW"))

Data Visualizations:

Population by State.

Up

ggplot(data = pop1, aes(x = state, y = poptotal, fill = state)) + geom_histogram(stat="identity") + ggtitle("Midwest States Vs. Total Population") +
xlab("Midwest States") + ylab("Total Population")

Conclusion:

The graph represents Illinois as highest populated state and Wisconsin as the least populated state. This can further be analyzed per race in each state which will provide more insight on economic status of individuals.

Demographic by Race.

Up

names(race_pop) <- c( "White_Population", "African_American_Population", "Native_American_Population", "Asian_American_Population", "Population_Other_Races", "State")
midwest2 <- melt(race_pop,id.vars="State", variable.name = "Race", value.name = "Population_by_Race" )

midwest2s <- aggregate(Population_by_Race ~ (Race), data = midwest2, FUN = 'sum')
midwest2s
##                          Race Population_by_Race
## 1            White_Population           30051658
## 2 African_American_Population            2281989
## 3  Native_American_Population             128449
## 4   Asian_American_Population             339845
## 5      Population_Other_Races             260636
midwest2s <- ddply(midwest2s, .(Race), transform, pos=cumsum(Population_by_Race)-0.5*(Population_by_Race))
ggplot(data = midwest2s, aes(x = "", y = Population_by_Race, fill = Race)) + geom_bar(stat = "identity", color = 'black') + coord_polar(theta="y") +  guides(fill=guide_legend(override.aes=list(colour=NA))) + ggtitle("The Total Population The Midwest By Race") +
ylab("Total Population") 

The graph represents the demograph of each race in midwest. If we look at the chart we see that the majoritry of population in midwest is White American, and the minoity are the Native Americans

   #create a histogram of midwest by demographic of races.
s <- ggplot(data = midwest2, aes(x = State, y = Population_by_Race, fill = Race))
s + geom_histogram(stat = "identity", position = "dodge") + ggtitle("Midwest States And Total Population By Each Race") +
xlab("Midwest States") + ylab("Total Population By Each Race")

Conclusion:

The graph shows the demographic of different races in each state.

Percent of overall poplation per state.

Up

midwest.sub <- midwest[, c(3, 12:16)]
head(midwest.sub)
##   state percwhite  percblack percamerindan  percasian  percother
## 1    IL  96.71206  2.5752761     0.1482826 0.37675897 0.18762294
## 2    IL  66.38434 32.9004329     0.1788067 0.45172219 0.08469791
## 3    IL  96.57128  2.8617170     0.2334734 0.10673071 0.22680275
## 4    IL  95.25417  0.4122574     0.1493216 0.48691813 3.69733169
## 5    IL  90.19877  9.3728581     0.2398903 0.08567512 0.10281014
## 6    IL  98.51210  0.1401031     0.1821340 0.54640215 0.61925577
names(midwest.sub) <- c("State", "White_Population", "African_American_Population", "Native_American_Population", "Asian_American_Population", "Population_Other_Races")
m3 <- melt(midwest.sub,id.vars="State", variable.name = "Race", value.name = "Population_by_Race")
midwest3 <- aggregate(Population_by_Race ~ ((Race + State)), data = m3, FUN = 'sum')
midwest3 <- aggregate(Population_by_Race ~ ((Race + State)), data = m3, FUN = 'mean')
midwest3
##                           Race State Population_by_Race
## 1             White_Population    IL         95.3771660
## 2  African_American_Population    IL          3.4093008
## 3   Native_American_Population    IL          0.1733525
## 4    Asian_American_Population    IL          0.5241432
## 5       Population_Other_Races    IL          0.5160375
## 6             White_Population    IN         97.2071380
## 7  African_American_Population    IN          1.8903350
## 8   Native_American_Population    IN          0.2218422
## 9    Asian_American_Population    IN          0.3826576
## 10      Population_Other_Races    IN          0.2980273
## 11            White_Population    MI         94.8963895
## 12 African_American_Population    MI          2.6130989
## 13  Native_American_Population    MI          1.3772376
## 14   Asian_American_Population    MI          0.5005243
## 15      Population_Other_Races    MI          0.6127498
## 16            White_Population    OH         95.6594696
## 17 African_American_Population    OH          3.2699274
## 18  Native_American_Population    OH          0.1842939
## 19   Asian_American_Population    OH          0.4230578
## 20      Population_Other_Races    OH          0.4632514
## 21            White_Population    WI         95.7865216
## 22 African_American_Population    WI          0.8216085
## 23  Native_American_Population    WI          2.5203660
## 24   Asian_American_Population    WI          0.5562127
## 25      Population_Other_Races    WI          0.3152911
# Create pie charts by each state.
midwest3 <- ddply(midwest3, .(State), transform, pos=cumsum(Population_by_Race)-0.5*(Population_by_Race));

ggplot(data = midwest3, aes(x = "", y = Population_by_Race, fill = Race)) + geom_bar(stat = "identity", color = 'black') + coord_polar(theta="y")  + guides(fill=guide_legend(override.aes=list(colour=NA)))+
facet_wrap(~State) + ggtitle("The Population Of Midwest By Race") +
ylab("Total Population By State") 

Conclusion:

The pie charts shows us the percent of different races in each state.

Does education has impact on Poverty?

Up

p <- ggplot(data = midwest, aes(y = percbelowpoverty, x = percollege)) 
p + geom_point((aes(color = state))) + ggtitle("College Education Vs Total Poverty") + xlab("Percent College Educated") + ylab("Percentage of Total poverty")

The following summary table shows the percentage of population who are college educated and also the percentage of population living below poverty level in each of the Midwest states.

  # create a dot plot with correlation line.
p + geom_point(aes(color = state)) + geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) + facet_wrap(~state) + ggtitle("College Education Vs Total Proverty by Each Midwest State") + xlab("Percent College Educated") + ylab("Percentage of Total proverty") 

Conclusion:

Within each state, the correlation of college education and poverty level are inversely related. The analysis was done at county level within each state. With the highest correlation being in Ohio state: -0.5. Although correlation does not suggest causation, the trend is uniform across all 5 states.

BoxPlot that shows the outliers for the above analysis on Eductaion vs Poverty in each state

Up

The BoxPlot provides all outliers for education vs poverty in each state and this can further be analyzed to identify the gaps

  #Create box plot with outlliers.
p <- ggplot(data = midwest, aes(y = percbelowpoverty, x = percollege))
p + geom_boxplot(aes(color = state)) + facet_wrap(~state) + ggtitle("College Education Vs Total Poverty by Each Midwest State") +
xlab("Percent College Educated") + ylab("Percentage of Total poverty")

The table below examines the probability that a person to be college educated and living below poverty within each state. The calculation was simply the multiplication of percentage of college educated residents and percentage of residents below poverty. As we can see the highest percentage of outlier are in Michigan as shown by our boxplot in R.

Conclusion:

The college level education and poverty level are negatively correlated across 5 states in Midwest US. Although there are a few outlier counties which have higher college educated population but also higher poverty, the probability of these cases are lower than 0.02%.

Shiny Application for MidWest Demography Details

Up

To summarize all the analysis in simple query based application, I have developed a Shiny Application which can be integrated with any portal to provide insight on MidWest Demography. This can easily be extended to all the other possible FAQs for MidWest Region. Currently I have implemented the state wise demograpghic details around population and education level of the Midwest region.

Code for Shiny

library(shiny)
library(sqldf)
library(reshape2)
library(ggplot2)
library(gridExtra)
library(grid)
wd.datapath = paste0(getwd(),"/data")
wd.init = getwd()
setwd(wd.datapath)

midwest2 = read.csv("midwest.csv", header = TRUE)

midwestNew <- sqldf(c("Alter table midwest2 ADD state_idMW varchar(15)",
  
  "Update midwest2 set state_idMW='ILLINOIS' where midwest2.state='IL'",
  "Update midwest2 set state_idMW='INDIANA' where midwest2.state='IN'",
  "Update midwest2 set state_idMW='MICHIGAN' where midwest2.state='MI'",
  "Update midwest2 set state_idMW='OHIO' where midwest2.state='OH'",
  "Update midwest2 set state_idMW='WISCONSIN' where midwest2.state='WI'",
  
  "Select * from midwest2"))

setwd(wd.init)

# ui.R files for front end of shinyApp

library(shiny)
wd.datapath = paste0(getwd(),"/data")
wd.init = getwd()
setwd(wd.datapath)

midwest2 = read.csv("midwest.csv", header = TRUE)

setwd(wd.init)


#df.shiny = read.csv("C:/Users/sanjivek/Desktop/shine/data/midwest.csv")
#print(midwest2)

# Define UI for application that draws a histogram
shinyUI(fluidPage(
  
  # Application title
  titlePanel("Midwest State Demography Distribution"),
  
  # Sidebar with a slider input for the number of bins
  sidebarLayout(
    sidebarPanel(
      
      selectInput(inputId = "Stat", 
                  label = h4("Select Midwest State for Demographic details"),
                  choices = list("ILLINOIS", "MICHIGAN",
                                 "INDIANA", "WISCONSIN","OHIO"),
                  selected = "ILLINOIS")
    ),
    
    # Show a plot of the generated distribution
    mainPanel(
      #tabsetPanel(tabPanel("Main",plotOutput("distPlot", height = 1000, width = 1000))
      
      plotOutput("distPlot")
    ))
  )
)



# Define server logic required to draw a histogram
shinyServer(function(input, output) {
  
     output$distPlot <- renderPlot({
     
      
       popQuery <- sqldf(paste0( "select sum(popwhite),sum(popblack),sum(popamerindian), sum(popasian), sum(popother),state_idMW   from midwestNew 
         where state_idMW ='", input$Stat, "';" ) )
       print(popQuery)
      
       popQuery2<-sqldf(paste0( "select percpovertyknown,percollege,state_idMW   from midwestNew 
         where state_idMW ='", input$Stat, "';" ) )
       
       popQuery3 <- sqldf(paste0( "select avg(percwhite),avg(percblack),avg(percamerindan), avg(percasian),
                    avg(percother),state_idMW   from midwestNew 
                     where state_idMW ='", input$Stat, "';" ) )
       
       
      m <- melt(popQuery,id.vars="state_idMW", variable.name = "Race", value.name = "Population_by_Race")
       print(m)
       
       n <- melt(popQuery3,id.vars="state_idMW", variable.name = "Race", value.name = "Percentage_by_Population")
       print(n)
       
       
      w<-ggplot(data = m, aes(x=state_idMW ,y=Population_by_Race,fill=Race)) +  geom_histogram(stat="identity",position="dodge")
      
      p <- ggplot(data = popQuery2, aes(y = percbelowpoverty, x = percollege)) + geom_point((aes(color = state_idMW))) + ggtitle("College Education Vs Total Poverty") +
      xlab("Percent College Educated") + ylab("Percentage of Total poverty") 
      
 
      
      z <- ggplot(data = n, aes(x = "", y = Percentage_by_Population, fill = Race)) + 
        geom_bar(stat = "identity", color = 'black') + coord_polar(theta="y") + 
        guides(fill=guide_legend(override.aes=list(colour=NA)))
     
      
      pushViewport(viewport(layout = grid.layout(3, 1),width=0.75,height=1))
      
      print(w, vp = viewport(layout.pos.row = 1, layout.pos.col = 1 ))
      
      print(p, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
      
      print(z, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))
      
     })
  })