About the data

knitr::opts_chunk$set(echo = TRUE)
library(plyr)
library(rgdal)

## Loading required package: sp

## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.3/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.3/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3

library(sp)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, last

counties <- readOGR("https://datahub.io/dataset/e82a4e76-ccf0-4d3a-b2c8-eda2b52ab32a/resource/f2693c94-fd69-4baf-887b-f302a5188ead/download/counties.geojson",
                    "OGRGeoJSON")

## OGR data source with driver: GeoJSON 
## Source: "https://datahub.io/dataset/e82a4e76-ccf0-4d3a-b2c8-eda2b52ab32a/resource/f2693c94-fd69-4baf-887b-f302a5188ead/download/counties.geojson", layer: "OGRGeoJSON"
## with 3220 features
## It has 26 fields

counties@data$FIPS <- as.character(counties@data$FIPS)
for (i in 1:nrow(counties@data) ) {
  if ( nchar( counties@data$FIPS[i] ) == 4) {
    counties@data$FIPS[i] <- paste("0",as.character(counties@data$FIPS[i]), sep = "")
  } else {
     counties@data$FIPS[i] <- counties@data$FIPS[i]
  }
}
counties@data$FIPS <- as.factor(counties@data$FIPS)
str(counties@data$FIPS)

##  Factor w/ 3220 levels "01001","01003",..: 976 1472 1879 2813 144 657 795 1395 2098 2398 ...

counties@data <- counties@data[,1:10]

sectors <- data.frame(cbind(
  c("11", "21","22", "23","31_33", "42","48_49", "51", "52","53", "54", "55", "56",
    "61", "62","71","72","81","92"),
  c("AgriForest",  "Mining", "Utilities","Construction", "Manufacturing",  "WholeTrade", 
    "RetailTrade", "Transportation",  "Information", "Finance", "RealEstate", "ProfScienceTech", 
    "Management",  "AdminWaste","Education","HealthCare","AccomodationFood","Other","PubAdmin"))
  )
names(sectors) <- c("sector", "description")

qcewGetIndustryData <- function (year, qtr, industry) {
  url <- "http://www.bls.gov/cew/data/api/YEAR/QTR/industry/INDUSTRY.csv"
  url <- sub("YEAR", year, url, ignore.case=FALSE)
  url <- sub("QTR", qtr, url, ignore.case=FALSE)
  url <- sub("INDUSTRY", industry, url, ignore.case=FALSE)
  read.csv(url, header = TRUE, sep = ",", quote="\"", dec=".", na.strings=" ", skip=0)
}
### Use this loop to go through the sectors loaded in the data frame 
for (i in 1:length(sectors[,1])) {
  if (i == 1) {
    print(paste("getting data for ", sectors[i,2]))
    dt1 <- qcewGetIndustryData("2015", "3", sectors[1,1])
    dt1 <- aggregate(dt1$month2_emplvl, by=list(Category=dt1$area_fips), FUN=sum)
    names(dt1) <- c("area_fips", as.character(sectors[1,2]))
    
  } else if (i == 2) {
    print(paste("getting data for ", sectors[i,2]))
    dt2 <- qcewGetIndustryData("2015", "3", sectors[i,1])    
    dt2 <- aggregate(dt2$month2_emplvl, by=list(Category=dt2$area_fips), FUN=sum)
    names(dt2) <- c("area_fips", as.character(sectors[i,2]))
    data <- merge(dt1, dt2, all=TRUE)
    
  } else {
    print(paste("getting data for ", sectors[i,2]))
    dt3 <- qcewGetIndustryData("2015", "3", sectors[i,1]) 
    dt3 <- aggregate(dt3$month2_emplvl, by=list(Category=dt3$area_fips), FUN=sum)
    names(dt3) <- c("area_fips", as.character(sectors[i,2]))
    data <- merge(data, dt3, all=TRUE)
    head(data)
      }
  
}

## [1] "getting data for  AgriForest"
## [1] "getting data for  Mining"
## [1] "getting data for  Utilities"
## [1] "getting data for  Construction"
## [1] "getting data for  Manufacturing"
## [1] "getting data for  WholeTrade"
## [1] "getting data for  RetailTrade"
## [1] "getting data for  Transportation"
## [1] "getting data for  Information"
## [1] "getting data for  Finance"
## [1] "getting data for  RealEstate"
## [1] "getting data for  ProfScienceTech"
## [1] "getting data for  Management"
## [1] "getting data for  AdminWaste"
## [1] "getting data for  Education"
## [1] "getting data for  HealthCare"
## [1] "getting data for  AccomodationFood"
## [1] "getting data for  Other"
## [1] "getting data for  PubAdmin"

Assign row names and column names for reference through the transformations

rownames(data) <- data$area_fips
colnames(data) <- c("FIPS", "AgriForest","Mining", "Utilities","Construction",  "Manufacturing", 
                    "WholeTrade", "RetailTrade","Transportation",  "Information", "Finance", 
                    "RealEstate", "ProfScienceTech", "Management", "AdminWaste", 
                    "Education","HealthCare", "AccomodationFood","Other","PubAdmin")
glimpse(data)

## Observations: 3,716
## Variables: 20
## $ FIPS             (fctr) 01000, 01001, 01003, 01005, 01007, 01009, 01...
## $ AgriForest       (int) 11620, 123, 693, 192, 0, 0, 213, 184, 0, 0, 1...
## $ Mining           (int) 6886, 75, 59, 188, 0, 0, NA, NA, 0, 0, NA, 0,...
## $ Utilities        (int) 23484, 105, 286, 52, 0, 88, 0, 99, 263, 82, 0...
## $ Construction     (int) 82230, 434, 3383, 141, 656, 410, 0, 220, 780,...
## $ Manufacturing    (int) 258386, 1475, 4092, 2214, 374, 1201, 0, 1279,...
## $ WholeTrade       (int) 73831, 138, 1636, 0, 129, 376, 0, 158, 1453, ...
## $ RetailTrade      (int) 68812, 198, 1696, 586, 35, 62, 20, 404, 1121,...
## $ Transportation   (int) 22190, 40, 332, 23, 0, 65, 26, 101, 502, 56, ...
## $ Information      (int) 70966, 312, 1822, 194, 81, 236, 45, 143, 1021...
## $ Finance          (int) 22384, 114, 2010, 44, 24, 30, 17, 36, 404, 33...
## $ RealEstate       (int) 97785, 189, 1843, 6, 50, 188, 0, 0, 1118, 166...
## $ ProfScienceTech  (int) 15394, 0, 304, 0, 0, 19, NA, 0, 104, 22, 0, 0...
## $ Management       (int) 120136, 0, 3157, 200, 0, 258, 0, 358, 3021, 4...
## $ AdminWaste       (int) 152204, 136, 1234, 488, 0, 974, 0, 495, 2669,...
## $ Education        (int) 259660, 967, 8115, 7, 0, 0, 416, 911, 6648, 0...
## $ HealthCare       (int) 22401, 80, 1262, 34, 10, 171, 0, 111, 135, 0,...
## $ AccomodationFood (int) 178131, 1374, 13500, 634, 236, 690, 0, 812, 4...
## $ Other            (int) 45591, 415, 2008, 139, 0, 313, 17, 105, 966, ...
## $ PubAdmin         (int) 124977, 782, 3835, 948, 445, 461, 264, 335, 5...

str(data)

## 'data.frame':    3716 obs. of  20 variables:
##  $ FIPS            : Factor w/ 3716 levels "01000","01001",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ AgriForest      : int  11620 123 693 192 0 0 213 184 0 0 ...
##  $ Mining          : int  6886 75 59 188 0 0 NA NA 0 0 ...
##  $ Utilities       : int  23484 105 286 52 0 88 0 99 263 82 ...
##  $ Construction    : int  82230 434 3383 141 656 410 0 220 780 199 ...
##  $ Manufacturing   : int  258386 1475 4092 2214 374 1201 0 1279 5961 1762 ...
##  $ WholeTrade      : int  73831 138 1636 0 129 376 0 158 1453 48 ...
##  $ RetailTrade     : int  68812 198 1696 586 35 62 20 404 1121 353 ...
##  $ Transportation  : int  22190 40 332 23 0 65 26 101 502 56 ...
##  $ Information     : int  70966 312 1822 194 81 236 45 143 1021 144 ...
##  $ Finance         : int  22384 114 2010 44 24 30 17 36 404 33 ...
##  $ RealEstate      : int  97785 189 1843 6 50 188 0 0 1118 166 ...
##  $ ProfScienceTech : int  15394 0 304 0 0 19 NA 0 104 22 ...
##  $ Management      : int  120136 0 3157 200 0 258 0 358 3021 475 ...
##  $ AdminWaste      : int  152204 136 1234 488 0 974 0 495 2669 0 ...
##  $ Education       : int  259660 967 8115 7 0 0 416 911 6648 0 ...
##  $ HealthCare      : int  22401 80 1262 34 10 171 0 111 135 0 ...
##  $ AccomodationFood: int  178131 1374 13500 634 236 690 0 812 4809 47 ...
##  $ Other           : int  45591 415 2008 139 0 313 17 105 966 145 ...
##  $ PubAdmin        : int  124977 782 3835 948 445 461 264 335 5303 551 ...

#this can only be computed for complete cases
data <- data[complete.cases(data),]

## cut out the US000 row, which is total US per sector
data <- data[data$FIPS != "US000",]

### Convert the data frame to a matrix and take a look
mat <- data.matrix( data[ ,2:length(data[1,]) ] )

# Calculate the LQ
share_tech_city <- mat / rowSums (mat)
share_tech_total <- colSums (mat) / sum (mat)
share_tech_total

##       AgriForest           Mining        Utilities     Construction 
##      0.009619727      0.005789681      0.005789169      0.057071168 
##    Manufacturing       WholeTrade      RetailTrade   Transportation 
##      0.103729722      0.049449108      0.044150633      0.024638739 
##      Information          Finance       RealEstate  ProfScienceTech 
##      0.048992362      0.018226407      0.073255521      0.018707948 
##       Management       AdminWaste        Education       HealthCare 
##      0.076458990      0.069321116      0.164563370      0.022518468 
## AccomodationFood            Other         PubAdmin 
##      0.112511308      0.036449343      0.058757220

lq <- t(share_tech_city) / share_tech_total
lq[is.na(lq)] <- 0 
lq[which(lq==Inf)] <- 0 
lq <- t(lq)


# here we go, this is the location quotient for each spatial unit - economic unit pair
colnames(lq) <- list("AgriForest", "Mining", "Utilities","Construction", "Manufacturing",  
                     "WholeTrade", "RetailTrade", "Transportation",  "Information", "Finance",
                     "RealEstate", "ProfScienceTech", "Management", "AdminWaste",
                     "Education","HealthCare", "AccomodationFood","Other","PubAdmin")
head(lq)

##       AgriForest    Mining Utilities Construction Manufacturing WholeTrade
## 01000  0.7289589 0.7177481 2.4480233    0.8695072      1.503230  0.9010315
## 01001  1.8378936 1.8620212 2.6070604    1.0930775      2.043934  0.4011425
## 01003  1.4051820 0.1987739 0.9636336    1.1562383      0.769475  0.6453376
## 01005  3.2773377 5.3319480 1.4749247    0.4056814      3.504751  0.0000000
## 01007  0.0000000 0.0000000 0.0000000    5.6345198      1.767414  1.2787955
## 01009  0.0000000 0.0000000 2.7428367    1.2962852      2.089168  1.3720277
##       RetailTrade Transportation Information   Finance RealEstate
## 01000   0.9405609      0.5434987   0.8741413 0.7411333 0.80554823
## 01001   0.6446237      0.2333563   0.9153859 0.8990457 0.37085099
## 01003   0.7492919      0.2628341   0.7254076 2.1510826 0.49073504
## 01005   2.1794323      0.1532823   0.6502136 0.3964006 0.01344911
## 01007   0.3885983      0.0000000   0.8104505 0.6454759 0.33457961
## 01009   0.2533893      0.4760235   0.8691948 0.2969981 0.46307460
##       ProfScienceTech Management AdminWaste   Education HealthCare
## 01000       0.4965752  0.9482095  1.3250131 0.952207395  0.6003275
## 01001       0.0000000  0.0000000  0.2820015 0.844639327  0.5106568
## 01003       0.3169637  0.8053935  0.3472256 0.961872365  1.0931572
## 01005       0.0000000  0.4295207  1.1559445 0.006984697  0.2479264
## 01007       0.0000000  0.0000000  0.0000000 0.000000000  0.2176862
## 01009       0.1832571  0.6088701  2.5352856 0.000000000  1.3702214
##       AccomodationFood     Other PubAdmin
## 01000        0.9554391 0.7548299 1.283596
## 01001        1.7553695 1.6365768 1.913038
## 01003        2.3404517 1.0745734 1.273111
## 01005        0.9252856 0.6261924 2.649292
## 01007        1.0282191 0.0000000 3.712518
## 01009        1.1065891 1.5494876 1.415706

Data were downloaded from the Bureau of Labor Statistics, and they contain the employment levels for each county at the 2-digit NAICS level. This same analysis could be replicated at various other levels of analysis by adapting the R script that lists the NAICS codes. More information on the BLS API and data availability can be found at the BLS website

Factor Analysis with Verimax Rotation of LQ data

Factor analysis provides insight to the underlying relationship between multiple variables in a data set of numeric data. For this post, we analyzed the location quotients of each county in the US to see what patterns would emerge in the co-occurence of sectors. In the output below, each factor represents a composite variable based on the loadings shown in the column. Using these loadings, the factors were applied to each county using matrix multiplication to determine a score for each factor shown. The resulting score was then attributed to that county as the factor score for that variable. This method essentially simplifies the economy into 5 composite sectors. Omitted columns represent insignificant loadings, and these were also removed from the matrix multiplication.

##########################################################
##  Perform Factor Analysis using Maximum Likelihood
##  with Varimax Rotation
##########################################################

# These are already complete cases
#lq <- lq[complete.cases(lq),]

fact1=factanal(lq[,c(1:17,19)],factors=5,rotation="varimax")
fact1

## 
## Call:
## factanal(x = lq[, c(1:17, 19)], factors = 5, rotation = "varimax")
## 
## Uniquenesses:
##       AgriForest           Mining        Utilities     Construction 
##            0.871            0.667            0.933            0.822 
##    Manufacturing       WholeTrade      RetailTrade   Transportation 
##            0.005            0.719            0.854            0.717 
##      Information          Finance       RealEstate  ProfScienceTech 
##            0.863            0.839            0.525            0.795 
##       Management       AdminWaste        Education       HealthCare 
##            0.707            0.947            0.005            0.712 
## AccomodationFood         PubAdmin 
##            0.005            0.005 
## 
## Loadings:
##                  Factor1 Factor2 Factor3 Factor4 Factor5
## AgriForest                                0.350         
## Mining                                    0.574         
## Utilities                                 0.223   0.101 
## Construction                     -0.111   0.392         
## Manufacturing    -0.437  -0.557  -0.551  -0.319  -0.297 
## WholeTrade        0.489  -0.120           0.138         
## RetailTrade              -0.110           0.358         
## Transportation    0.511                  -0.133         
## Information       0.366                                 
## Finance           0.142   0.367                         
## RealEstate        0.674           0.100                 
## ProfScienceTech   0.394           0.104  -0.170         
## Management        0.502                          -0.193 
## AdminWaste                        0.141   0.144         
## Education        -0.104           0.926  -0.336  -0.100 
## HealthCare                0.529                         
## AccomodationFood -0.247   0.935  -0.109  -0.214         
## PubAdmin         -0.159                           0.976 
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings      1.823   1.656   1.253   1.153   1.125
## Proportion Var   0.101   0.092   0.070   0.064   0.062
## Cumulative Var   0.101   0.193   0.263   0.327   0.389
## 
## Test of the hypothesis that 5 factors are sufficient.
## The chi square statistic is 6080.81 on 73 degrees of freedom.
## The p-value is 0

#get loading plot for factors 1 by 2-5
par(mfrow=c(3,3),mar = c(4,4,1,1))

plot(fact1$loadings, pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings, labels=colnames(lq),xpd=TRUE)

#get loading plot for factors 1 through 5
plot(fact1$loadings[,c(1,3)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(1,3)], labels=colnames(lq),xpd=TRUE)

plot(fact1$loadings[,c(1,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(1,4)], labels=colnames(lq),xpd=TRUE)

plot(fact1$loadings[,c(1,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(1,5)], labels=colnames(lq),xpd=TRUE)
# 4

plot(fact1$loadings[,c(2,3)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(2,3)], labels=colnames(lq),xpd=TRUE)

plot(fact1$loadings[,c(2,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(2,4)], labels=colnames(lq),xpd=TRUE)

plot(fact1$loadings[,c(2,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(2,5)], labels=colnames(lq),xpd=TRUE)

plot(fact1$loadings[,c(3,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(3,4)], labels=colnames(lq),xpd=TRUE)

plot(fact1$loadings[,c(3,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings[,c(3,5)], labels=colnames(lq),xpd=TRUE)

#8
#######################

#get reproduced correlation matrix
repro1=fact1$loadings%*%t(fact1$loadings)
#residual correlation matrix -- should be close to zero
resid1=fact1$cor-repro1
round(resid1,2)

##                  AgriForest Mining Utilities Construction Manufacturing
## AgriForest             0.87  -0.03     -0.02        -0.17             0
## Mining                -0.03   0.67      0.01        -0.12             0
## Utilities             -0.02   0.01      0.93        -0.07             0
## Construction          -0.17  -0.12     -0.07         0.82             0
## Manufacturing          0.00   0.00      0.00         0.00             0
## WholeTrade            -0.03  -0.06     -0.01        -0.06             0
## RetailTrade           -0.11  -0.10     -0.03        -0.11             0
## Transportation        -0.02   0.01      0.00        -0.07             0
## Information           -0.02  -0.08      0.02        -0.06             0
## Finance               -0.08   0.02     -0.02         0.00             0
## RealEstate            -0.05  -0.01     -0.01        -0.03             0
## ProfScienceTech       -0.02   0.00     -0.03        -0.09             0
## Management            -0.08  -0.10     -0.02        -0.12             0
## AdminWaste            -0.12  -0.16     -0.06        -0.16             0
## Education              0.00   0.00      0.00         0.00             0
## HealthCare            -0.05  -0.07     -0.01         0.00             0
## AccomodationFood       0.00   0.00      0.00         0.00             0
## PubAdmin               0.00   0.00      0.00         0.00             0
##                  WholeTrade RetailTrade Transportation Information Finance
## AgriForest            -0.03       -0.11          -0.02       -0.02   -0.08
## Mining                -0.06       -0.10           0.01       -0.08    0.02
## Utilities             -0.01       -0.03           0.00        0.02   -0.02
## Construction          -0.06       -0.11          -0.07       -0.06    0.00
## Manufacturing          0.00        0.00           0.00        0.00    0.00
## WholeTrade             0.72       -0.20          -0.01       -0.02   -0.10
## RetailTrade           -0.20        0.85          -0.02       -0.04   -0.06
## Transportation        -0.01       -0.02           0.72        0.01   -0.03
## Information           -0.02       -0.04           0.01        0.86    0.06
## Finance               -0.10       -0.06          -0.03        0.06    0.84
## RealEstate            -0.05       -0.06           0.00       -0.08    0.00
## ProfScienceTech       -0.14        0.03          -0.03        0.07   -0.01
## Management            -0.06       -0.03          -0.03       -0.11   -0.01
## AdminWaste            -0.13       -0.14          -0.06       -0.09   -0.04
## Education              0.00        0.00           0.00        0.00    0.00
## HealthCare            -0.05       -0.05          -0.05       -0.09    0.03
## AccomodationFood       0.00        0.00           0.00        0.00    0.00
## PubAdmin               0.00        0.00           0.00        0.00    0.00
##                  RealEstate ProfScienceTech Management AdminWaste
## AgriForest            -0.05           -0.02      -0.08      -0.12
## Mining                -0.01            0.00      -0.10      -0.16
## Utilities             -0.01           -0.03      -0.02      -0.06
## Construction          -0.03           -0.09      -0.12      -0.16
## Manufacturing          0.00            0.00       0.00       0.00
## WholeTrade            -0.05           -0.14      -0.06      -0.13
## RetailTrade           -0.06            0.03      -0.03      -0.14
## Transportation         0.00           -0.03      -0.03      -0.06
## Information           -0.08            0.07      -0.11      -0.09
## Finance                0.00           -0.01      -0.01      -0.04
## RealEstate             0.53            0.02      -0.09      -0.11
## ProfScienceTech        0.02            0.79      -0.01      -0.04
## Management            -0.09           -0.01       0.71      -0.08
## AdminWaste            -0.11           -0.04      -0.08       0.95
## Education              0.00            0.00       0.00       0.00
## HealthCare            -0.02            0.03      -0.02      -0.03
## AccomodationFood       0.00            0.00       0.00       0.00
## PubAdmin               0.00            0.00       0.00       0.00
##                  Education HealthCare AccomodationFood PubAdmin
## AgriForest               0      -0.05                0        0
## Mining                   0      -0.07                0        0
## Utilities                0      -0.01                0        0
## Construction             0       0.00                0        0
## Manufacturing            0       0.00                0        0
## WholeTrade               0      -0.05                0        0
## RetailTrade              0      -0.05                0        0
## Transportation           0      -0.05                0        0
## Information              0      -0.09                0        0
## Finance                  0       0.03                0        0
## RealEstate               0      -0.02                0        0
## ProfScienceTech          0       0.03                0        0
## Management               0      -0.02                0        0
## AdminWaste               0      -0.03                0        0
## Education                0       0.00                0        0
## HealthCare               0       0.71                0        0
## AccomodationFood         0       0.00                0        0
## PubAdmin                 0       0.00                0        0

#get root-mean squared residuals
len=length(resid1[upper.tri(resid1)])
RMSR1=sqrt(sum(resid1[upper.tri(resid1)]^2)/len)
RMSR1

## [1] 0.05426468

#get proportion of residuals greater than 0.05 in absolute value
sum(rep(1,len)[abs(resid1[upper.tri(resid1)])>0.05])/len

## [1] 0.254902

# smaller is better -- 0.2549 is ok but not great

Factor Characterizations

To make these factors understandable, we will go through each factor, look at the loadings and name them based on what they represent. ####Factor 1: The first factor has relatively high scores in Produducing and Natural Resources, and relatively low scores in Services. So counties with high scores in this factor would be

fal <- matrix(fact1$loadings[c(1:18),],nrow=18, ncol=5, dimnames = list(c("AgriForest",
"Mining", "Utilities",
"Construction", 
    "Manufacturing",  "WholeTrade", "RetailTrade",
"Transportation",  "Information", "Finance",
"RealEstate",  "ProfScienceTech", 
    "Management",  "AdminWaste","Education","HealthCare","AccomodationFood","PubAdmin"), 
    c("Factor1", "Factor2",   "Factor3",   "Factor4",    "Factor5")))


lqdf <- as.data.frame(lq)
lqdf$FIPS <- rownames(lqdf)

### Loop through the lq matrix and multiply 
for (i in 1:nrow(lq)) {
   lqdf$fact1[i] <- sum(data.frame(
     mapply(`*`,lqdf[ i, c(5,6,8:13,15,17,18)], fal[c(5,6,8:13,15,17,18),1],SIMPLIFY=FALSE)) )
   lqdf$fact2[i] <- sum(data.frame(
     mapply(`*`,lqdf[ i, c(5:7,10,16,17)], fal[c(5:7,10,16,17),2],SIMPLIFY=FALSE)) )
   lqdf$fact3[i] <- sum(data.frame(
     mapply(`*`,lqdf[ i, c(4,5,11,12,14,15,17)], fal[c(4,5,11,12,14,15,17),3],SIMPLIFY=FALSE)) )
   lqdf$fact4[i] <- sum(data.frame(
     mapply(`*`,lqdf[ i, c(1:8,12,14,15,17)], fal[c(1:8,12,14,15,17),4],SIMPLIFY=FALSE)) )
   lqdf$fact5[i] <- sum(data.frame(
     mapply(`*`,lqdf[ i, c(3,5,13,15,18)], fal[c(3,5,13,15,18),5],SIMPLIFY=FALSE)) )
}

counties@data <- join(x = counties@data, y = lqdf, by = "FIPS")

leaflet link

library(rgdal)
library(leaflet)
library(shiny)

pal <- colorQuantile("YlGn", NULL, n = 5)
pal2 <- colorQuantile("YlGnBu", NULL, n = 5)
pal3 <- colorQuantile("OrRd", NULL, n = 5)
pal4 <- colorQuantile("BuGn", NULL, n = 5)
pal5 <- colorQuantile("PuBu", NULL, n = 5)

county_popup <- paste0("<strong>County: </strong>", 
                      counties$NAME, 
                      "<br><strong>Factor 1: </strong>", 
                      round(counties$fact1),2)
county_popup2 <- paste0("<strong>County: </strong>", 
                      counties$NAME, 
                      "<br><strong>Factor 2: </strong>", 
                      round(counties$fact2),2)
county_popup3 <- paste0("<strong>County: </strong>", 
                      counties$NAME, 
                      "<br><strong>Factor 3: </strong>", 
                      round(counties$fact3),2)
county_popup4 <- paste0("<strong>County: </strong>", 
                      counties$NAME, 
                      "<br><strong>Factor 4: </strong>", 
                      round(counties$fact4),2)
county_popup5 <- paste0("<strong>County: </strong>", 
                      counties$NAME, 
                      "<br><strong>Factor 5: </strong>", 
                      round(counties$fact5),2)

leaflet() %>%
  setView(lng = -90, lat = 37, zoom = 3) %>% # center view on the US
  addProviderTiles("CartoDB.Positron") %>% # this sets the base layer
  addPolygons(data = counties,  # spatialPolygonDataFrame - could be lines or points
              group = "Factor 1", # this matches "base groups" below
              fillColor = ~ pal(fact1), # define the color column
              stroke = FALSE, # no outline
              fillOpacity = 0.6, 
              color = "#FFFFFF", 
              weight = 1, 
              popup = county_popup) %>%
    addPolygons(data = counties, 
              group = "Factor 2",
              fillColor = ~ pal2(fact2),
              stroke = FALSE,
              fillOpacity = 0.6, 
              color = "#BDBDC3", 
              weight = 1, 
              popup = county_popup2) %>%
    addPolygons(data = counties, 
              group = "Factor 3",
              fillColor = ~ pal3(fact3),
              stroke = FALSE,
              fillOpacity = 0.6, 
              color = "#BDBDC3", 
              weight = 1, 
              popup = county_popup3) %>%
    addPolygons(data = counties, 
              group = "Factor 4",
              fillColor = ~ pal4(fact4),
              stroke = FALSE,
              fillOpacity = 0.6, 
              color = "#BDBDC3", 
              weight = 1, 
              popup = county_popup4) %>%
    addPolygons(data = counties, 
              group = "Factor 5",
              fillColor = ~ pal5(fact5),
              stroke = FALSE,
              fillOpacity = 0.6, 
              color = "#BDBDC3", 
              weight = 1, 
              popup = county_popup5) %>%
  addLayersControl(
    baseGroups = c("Factor 1", "Factor 2", "Factor 3", "Factor 4","Factor 5"),
    #overlayGroups = c("Quakes", "Outline"),
    options = layersControlOptions(collapsed = FALSE)
  )

LQ-PCA

Ryan Thomas

June 23, 2016

About the data

Assign row names and column names for reference through the transformations

Factor Analysis with Verimax Rotation of LQ data

Factor Characterizations