Loading the needed libraries.

rm(list=ls())
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 494409 26.5     940480 50.3   750400 40.1
## Vcells 934606  7.2    1650153 12.6  1176241  9.0
library(tidyverse) #visualizeing, transforming, inputing, tidying and joining data
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.1     ✔ dplyr   0.7.4
## ✔ tidyr   0.7.2     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readxl)
library(plotly)    #showing plots and data at the same time
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggmap)
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(DT)        #previewing the data sets
library(ggrepel)   #avoid label overlapping
library(car)       #testing multicollinearity and heteroscedasticity 
## Warning: package 'car' was built under R version 3.4.4
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.4.4
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(modelr)    #testing if the model can be applied generally
library(scales)    #formatting scales to non-scientific type
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(gridExtra) #putting multiple plots into 1 page
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(neuralnet) #training of Neural Networks
## 
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
## 
##     compute

The initial idea behind this project was to look at socioeconomic and demographic data to be able ro see what are the best places to live in the U.S.The first dataset was obtained from https://www.bls.gov/home.htm and converted into a csv file then iploaded to RStudio

wages<- read.csv(file="mwe_2016complete.csv", header=TRUE, sep=",")
head(wages, 20)
##                        Series.ID
## 1  WMU00000001020000001100000006
## 2  WMU00000001020000001100000007
## 3  WMU00000001020000001100000008
## 4  WMU00000001020000001100000009
## 5  WMU00000001020000001100000010
## 6  WMU00000001020000001100000011
## 7  WMU00000001020000001100000012
## 8  WMU00000001020000001100000013
## 9  WMU00000001020000001100000016
## 10 WMU00000001020000001110210008
## 11 WMU00000001020000001110210010
## 12 WMU00000001020000001110210011
## 13 WMU00000001020000001110210012
## 14 WMU00000001020000001110210013
## 15 WMU00000001020000001110210016
## 16 WMU00000001020000001120210009
## 17 WMU00000001020000001120210010
## 18 WMU00000001020000001120210011
## 19 WMU00000001020000001120210012
## 20 WMU00000001020000001120210016
##                                                                           Series.Title
## 1                    Hourly mean wage for management occupations, for all US, Level 06
## 2                    Hourly mean wage for management occupations, for all US, Level 07
## 3                    Hourly mean wage for management occupations, for all US, Level 08
## 4                    Hourly mean wage for management occupations, for all US, Level 09
## 5                    Hourly mean wage for management occupations, for all US, Level 10
## 6                    Hourly mean wage for management occupations, for all US, Level 11
## 7                    Hourly mean wage for management occupations, for all US, Level 12
## 8                    Hourly mean wage for management occupations, for all US, Level 13
## 9           Hourly mean wage for management occupations, for all US, not able to level
## 10          Hourly mean wage for general and operations managers, for all US, Level 08
## 11          Hourly mean wage for general and operations managers, for all US, Level 10
## 12          Hourly mean wage for general and operations managers, for all US, Level 11
## 13          Hourly mean wage for general and operations managers, for all US, Level 12
## 14          Hourly mean wage for general and operations managers, for all US, Level 13
## 15 Hourly mean wage for general and operations managers, for all US, not able to level
## 16                       Hourly mean wage for marketing managers, for all US, Level 09
## 17                       Hourly mean wage for marketing managers, for all US, Level 10
## 18                       Hourly mean wage for marketing managers, for all US, Level 11
## 19                       Hourly mean wage for marketing managers, for all US, Level 12
## 20              Hourly mean wage for marketing managers, for all US, not able to level
##    Average.Hourly.Wage Area.Code Area.Level Area.Text Occupation.Code
## 1                16.02         0   National  National          110000
## 2                22.65         0   National  National          110000
## 3                25.86         0   National  National          110000
## 4                34.61         0   National  National          110000
## 5                40.20         0   National  National          110000
## 6                52.91         0   National  National          110000
## 7                68.79         0   National  National          110000
## 8                85.22         0   National  National          110000
## 9                65.23         0   National  National          110000
## 10               24.10         0   National  National          111021
## 11               36.27         0   National  National          111021
## 12               55.80         0   National  National          111021
## 13               61.58         0   National  National          111021
## 14               77.03         0   National  National          111021
## 15               70.49         0   National  National          111021
## 16               40.27         0   National  National          112021
## 17               43.09         0   National  National          112021
## 18               56.17         0   National  National          112021
## 19               73.05         0   National  National          112021
## 20               76.92         0   National  National          112021
##                    Occupation.Text Job.Characteristic.Code
## 1           Management Occupations                       0
## 2           Management Occupations                       0
## 3           Management Occupations                       0
## 4           Management Occupations                       0
## 5           Management Occupations                       0
## 6           Management Occupations                       0
## 7           Management Occupations                       0
## 8           Management Occupations                       0
## 9           Management Occupations                       0
## 10 General and Operations Managers                       0
## 11 General and Operations Managers                       0
## 12 General and Operations Managers                       0
## 13 General and Operations Managers                       0
## 14 General and Operations Managers                       0
## 15 General and Operations Managers                       0
## 16              Marketing Managers                       0
## 17              Marketing Managers                       0
## 18              Marketing Managers                       0
## 19              Marketing Managers                       0
## 20              Marketing Managers                       0
##    Job.Characteristic.Text Work.Level.Code        Work.Level.Text
## 1              All workers               6               Level 06
## 2              All workers               7               Level 07
## 3              All workers               8               Level 08
## 4              All workers               9               Level 09
## 5              All workers              10               Level 10
## 6              All workers              11               Level 11
## 7              All workers              12               Level 12
## 8              All workers              13               Level 13
## 9              All workers              16 Not able to be leveled
## 10             All workers               8               Level 08
## 11             All workers              10               Level 10
## 12             All workers              11               Level 11
## 13             All workers              12               Level 12
## 14             All workers              13               Level 13
## 15             All workers              16 Not able to be leveled
## 16             All workers               9               Level 09
## 17             All workers              10               Level 10
## 18             All workers              11               Level 11
## 19             All workers              12               Level 12
## 20             All workers              16 Not able to be leveled
summary(wages) 
##                          Series.ID     
##  WMU00000001020000001100000006:     1  
##  WMU00000001020000001100000007:     1  
##  WMU00000001020000001100000008:     1  
##  WMU00000001020000001100000009:     1  
##  WMU00000001020000001100000010:     1  
##  WMU00000001020000001100000011:     1  
##  (Other)                      :255473  
##                                                               Series.Title   
##  Hourly mean wage for accountants and auditors, for all US, Level 06:     1  
##  Hourly mean wage for accountants and auditors, for all US, Level 07:     1  
##  Hourly mean wage for accountants and auditors, for all US, Level 08:     1  
##  Hourly mean wage for accountants and auditors, for all US, Level 09:     1  
##  Hourly mean wage for accountants and auditors, for all US, Level 10:     1  
##  Hourly mean wage for accountants and auditors, for all US, Level 11:     1  
##  (Other)                                                            :255473  
##  Average.Hourly.Wage   Area.Code               Area.Level    
##  Min.   : 8.26       Min.   :      0   Metro area   :165981  
##  1st Qu.:12.31       1st Qu.:  24860   National     :  3386  
##  Median :16.84       Median :  41100   Nonmetro area: 64329  
##  Mean   :20.04       Mean   :1054035   State area   : 21783  
##  3rd Qu.:24.31       3rd Qu.:1800003                         
##  Max.   :99.88       Max.   :5600004                         
##                                                              
##                             Area.Text      Occupation.Code 
##  National                        :  3386   Min.   :110000  
##  Ohio                            :   804   1st Qu.:310000  
##  Columbus, OH                    :   775   Median :410000  
##  Cleveland-Elyria, OH            :   769   Mean   :377382  
##  Indianapolis-Carmel-Anderson, IN:   764   3rd Qu.:436014  
##  North Carolina                  :   762   Max.   :537199  
##  (Other)                         :248219                   
##                                            Occupation.Text  
##  Office and Administrative Support Occupations     : 16856  
##  Sales and Related Occupations                     : 13860  
##  Healthcare Practitioners and Technical Occupations: 13274  
##  Transportation and Material Moving Occupations    : 12613  
##  Production Occupations                            : 12513  
##  Education, Training, and Library Occupations      :  9912  
##  (Other)                                           :176451  
##  Job.Characteristic.Code        Job.Characteristic.Text Work.Level.Code
##  Min.   : 0.00           All workers        :107059     Min.   : 0.00  
##  1st Qu.: 0.00           Full-time          : 74120     1st Qu.: 1.00  
##  Median :24.00           Incentive-based pay:  4329     Median : 3.00  
##  Mean   :14.62           Nonunion           : 12684     Mean   : 4.83  
##  3rd Qu.:25.00           Part-time          : 40274     3rd Qu.: 7.00  
##  Max.   :28.00           Time-based pay     :  4329     Max.   :16.00  
##                          Union              : 12684                    
##                Work.Level.Text 
##  All levels            :63658  
##  Not able to be leveled:27933  
##  Level 03              :27287  
##  Level 02              :24440  
##  Level 04              :23514  
##  Level 05              :17002  
##  (Other)               :71645
#The histogram of the data shows that the distrubution on the data points of the Average Hourly Wage is skewed to the right and distributed unevenly. 
hist(wages$Average.Hourly.Wage, col="green")

#looking at the min and max values of the data
apply(wages,2 , range)
##      Series.ID                      
## [1,] "WMU00000001020000001100000006"
## [2,] "WMU56000041020000005370622601"
##      Series.Title                                                         
## [1,] "Hourly mean wage for accountants and auditors, for all US, Level 06"
## [2,] "Hourly mean wage for word processors and typists, union, for all US"
##      Average.Hourly.Wage Area.Code Area.Level   Area.Text    
## [1,] " 8.26"             "      0" "Metro area" "Abilene, TX"
## [2,] "99.88"             "5600004" "State area" "Yuma, AZ"   
##      Occupation.Code Occupation.Text               Job.Characteristic.Code
## [1,] "110000"        "Accountants and Auditors"    " 0"                   
## [2,] "537199"        "Word Processors and Typists" "28"                   
##      Job.Characteristic.Text Work.Level.Code Work.Level.Text         
## [1,] "All workers"           " 0"            "All levels"            
## [2,] "Union"                 "16"            "Not able to be leveled"

The next dataset is an API with a public key https://api.data.gov/ and it containes crime estimates from 1995 until 2016

library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
library(tidyr)
library(dplyr)
library(knitr)
api_key<-"iiHnOKfno2Mgkt5AynpvPpUQTEyxE77jo1RU8PIv"
url <-"https://api.usa.gov/crime/fbi/ucr/estimates/national?page=1&per_page=100&output=json&api_key="
api_url= paste(url,api_key,sep="")
api_url
## [1] "https://api.usa.gov/crime/fbi/ucr/estimates/national?page=1&per_page=100&output=json&api_key=iiHnOKfno2Mgkt5AynpvPpUQTEyxE77jo1RU8PIv"

FBI Crime Data API

#Read JSON using API key
crimes<- fromJSON(api_url, flatten = TRUE)

#Convert JSON into data.frame
crimes<- data.frame(crimes)
#review the column names 
colnames(crimes)
##  [1] "results.rape_revised"        "results.motor_vehicle_theft"
##  [3] "results.violent_crime"       "results.larceny"            
##  [5] "results.rape_legacy"         "results.homicide"           
##  [7] "results.population"          "results.property_crime"     
##  [9] "results.caveats"             "results.burglary"           
## [11] "results.aggravated_assault"  "results.robbery"            
## [13] "results.year"                "pagination.pages"           
## [15] "pagination.count"            "pagination.page"            
## [17] "pagination.per_page"
apply(crimes, 2, range)
##      results.rape_revised results.motor_vehicle_theft
## [1,] NA                   " 686803"                  
## [2,] NA                   "1472441"                  
##      results.violent_crime results.larceny results.rape_legacy
## [1,] "1153022"             "5638455"       " 82109"           
## [2,] "1798792"             "7997710"       "130603"           
##      results.homicide results.population results.property_crime
## [1,] "14164"          "262803276"        " 7919035"            
## [2,] "21606"          "323127513"        "12063935"            
##      results.caveats results.burglary results.aggravated_assault
## [1,] NA              "1515096"        " 726777"                 
## [2,] NA              "2593784"        "1099207"                 
##      results.robbery results.year pagination.pages pagination.count
## [1,] "322905"        "1995"       "1"              "22"            
## [2,] "580509"        "2016"       "1"              "22"            
##      pagination.page pagination.per_page
## [1,] "1"             "100"              
## [2,] "1"             "100"
head(crimes)
##   results.rape_revised results.motor_vehicle_theft results.violent_crime
## 1                   NA                     1472441               1798792
## 2                   NA                     1394238               1688540
## 3                   NA                     1354189               1636099
## 4                   NA                     1242781               1533887
## 5                   NA                     1152075               1426044
## 6                   NA                     1160002               1425486
##   results.larceny results.rape_legacy results.homicide results.population
## 1         7997710               97470            21606          262803276
## 2         7904685               96252            19645          265228572
## 3         7743760               96153            18211          267783607
## 4         7376311               93144            16974          270248003
## 5         6955520               89411            15522          272690813
## 6         6971590               90178            15586          281421906
##   results.property_crime results.caveats results.burglary
## 1               12063935            <NA>          2593784
## 2               11805323            <NA>          2506400
## 3               11558975            <NA>          2461026
## 4               10951827            <NA>          2332735
## 5               10208334            <NA>          2100739
## 6               10182584            <NA>          2050992
##   results.aggravated_assault results.robbery results.year pagination.pages
## 1                    1099207          580509         1995                1
## 2                    1037049          535594         1996                1
## 3                    1023201          498534         1997                1
## 4                     976583          447186         1998                1
## 5                     911740          409371         1999                1
## 6                     911706          408016         2000                1
##   pagination.count pagination.page pagination.per_page
## 1               22               1                 100
## 2               22               1                 100
## 3               22               1                 100
## 4               22               1                 100
## 5               22               1                 100
## 6               22               1                 100

The NAn variables were removed so that the dataset can be scaled and neural network analysis could be performed.

crimes_clean <- crimes[, -c(1,9,14,15,16,17)] 
head(crimes_clean)
##   results.motor_vehicle_theft results.violent_crime results.larceny
## 1                     1472441               1798792         7997710
## 2                     1394238               1688540         7904685
## 3                     1354189               1636099         7743760
## 4                     1242781               1533887         7376311
## 5                     1152075               1426044         6955520
## 6                     1160002               1425486         6971590
##   results.rape_legacy results.homicide results.population
## 1               97470            21606          262803276
## 2               96252            19645          265228572
## 3               96153            18211          267783607
## 4               93144            16974          270248003
## 5               89411            15522          272690813
## 6               90178            15586          281421906
##   results.property_crime results.burglary results.aggravated_assault
## 1               12063935          2593784                    1099207
## 2               11805323          2506400                    1037049
## 3               11558975          2461026                    1023201
## 4               10951827          2332735                     976583
## 5               10208334          2100739                     911740
## 6               10182584          2050992                     911706
##   results.robbery results.year
## 1          580509         1995
## 2          535594         1996
## 3          498534         1997
## 4          447186         1998
## 5          409371         1999
## 6          408016         2000

The data was then scaled

head(crimes_clean)
##   results.motor_vehicle_theft results.violent_crime results.larceny
## 1                     1472441               1798792         7997710
## 2                     1394238               1688540         7904685
## 3                     1354189               1636099         7743760
## 4                     1242781               1533887         7376311
## 5                     1152075               1426044         6955520
## 6                     1160002               1425486         6971590
##   results.rape_legacy results.homicide results.population
## 1               97470            21606          262803276
## 2               96252            19645          265228572
## 3               96153            18211          267783607
## 4               93144            16974          270248003
## 5               89411            15522          272690813
## 6               90178            15586          281421906
##   results.property_crime results.burglary results.aggravated_assault
## 1               12063935          2593784                    1099207
## 2               11805323          2506400                    1037049
## 3               11558975          2461026                    1023201
## 4               10951827          2332735                     976583
## 5               10208334          2100739                     911740
## 6               10182584          2050992                     911706
##   results.robbery results.year
## 1          580509         1995
## 2          535594         1996
## 3          498534         1997
## 4          447186         1998
## 5          409371         1999
## 6          408016         2000
MaxValue<-apply(crimes_clean, 2, max)
MinValue<-apply(crimes_clean, 2, min)
length(MinValue)==ncol(crimes)
## [1] FALSE
crimesdf<-as.data.frame(scale(crimes_clean, center = MinValue, scale = MaxValue- MinValue))
ind<-sample(1:nrow(crimesdf),8)
traindf<-crimesdf[ind,]
testdf<-crimesdf[-ind,]

The neural network plot resulted in 4 layer graph. The first layer are all the variables that predict the last layer which in this case is results.homicide.

plot(neuralModel)

Predict for test data

predictions<- compute(neuralModel,testdf [,1:10])
str(predictions)
## List of 2
##  $ neurons   :List of 3
##   ..$ : num [1:14, 1:11] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:14] "2" "3" "5" "7" ...
##   .. .. ..$ : chr [1:11] "1" "results.motor_vehicle_theft" "results.violent_crime" "results.larceny" ...
##   ..$ : num [1:14, 1:5] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:14] "2" "3" "5" "7" ...
##   .. .. ..$ : NULL
##   ..$ : num [1:14, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:14] "2" "3" "5" "7" ...
##   .. .. ..$ : NULL
##  $ net.result: num [1:14, 1] 0.451 0.507 0.441 0.37 0.324 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:14] "2" "3" "5" "7" ...
##   .. ..$ : NULL
predictions<- predictions$net.result*(max(testdf$results.homicide)-min(testdf$results.homicide))+min(testdf$results.homicide)
actualValues<- (testdf$results.homicide*(max(testdf$results.homicide)-min(testdf$results.homicide))+min(testdf$results.homicide))


MSE<- sum((predictions - actualValues)^2)/ nrow(testdf)
MSE
## [1] 0.01376197041

The Mean Square Error is 0.0494 which is pretty close to 0 which means that the neural network is fitting well in this dataset but it’s not small enough for the analysis to be accurate.