INTRODUCTION

This file reads in data from our ACA visualization project database. The data here are mostly from the 2014 (5 year wave) of the American Communicty Survey. I used data from two sources: demographics (age, sex,race) and from the economics (income, employment, insurance etc). Economic varaibles have the prefix ec in front of them. Note: some code use from Quick R examples at http://www.statmethods.net/advstats/cluster.html

What I do here is look at the posbile clusters of counties–using ACS data. I then look at a number of kmeans methods and then do a hierchcial clsutering method to see waht it looks like with 16 or 5 clusters. I choose these two becasue it seems that 5 is a “good” number from the clsutering output and 16 is what the state has used for regional rating areas.

rm()
#cluster
#install.packages("cluster")
#install.packages("NbClust")

#get libraries
  library(NbClust)
  library(cluster)
  library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
  library(ggplot2)
 
dev.off()
## null device 
##           1
# Read in Data--date set has be slightly cleaned to some tex vars and those with NA in total column

data1 <- read.csv("C:/Users/mgrace/Dropbox/!classes/!acs_tableau/clsuterdemogsfilega3.csv", stringsAsFactors=FALSE)

# change missing information to 0 for beds and docs
# and get rid of character data feilds for state and statefips

  data1$sum_emergency[is.na(data1$sum_emergency)]<-0
  data1$sum_beds[is.na(data1$sum_beds)]<-0
  data1$statecode<-NULL
  data1$statefips<-NULL

#hold onto rating area for later use

xran<-data1$rating_area_number
#drop rating area so that it does not confound clustering

data1$rating_area_number<-NULL

#scale data to make it ieasier to run in clsuter
# scale is generic function whose default method centers and/or scales the columns of a numeric matrix.

#make sure data stays in a data frame

  data2<-data.frame(scale(data1))

# Create soem variables in data1 for later
  
data1$pct_uninsured<-100*((data1$ECHC01_VC134)/(data1$ECHC01_VC130))
data1$pct_unemployed<-100*(data1$ECHC01_VC07/data1$ECHC01_VC03)

Here is a list of the variables used -need to look at the code book for data definitons

It is an excel spreadsheet with varaible names and labels

dplyr::tbl_df(data.frame(data2))
## Source: local data frame [159 x 210]
## 
##     HC01_VC03   HC01_VC04   HC01_VC05  HC01_VC08  HC01_VC09  HC01_VC10
##         (dbl)       (dbl)       (dbl)      (dbl)      (dbl)      (dbl)
## 1  -0.3321726 -0.33140176 -0.33283978 -0.3162455 -0.3250694 -0.3290945
## 2  -0.4087907 -0.41058896 -0.40702212 -0.3793349 -0.4007917 -0.3995694
## 3  -0.3868511 -0.38668310 -0.38694025 -0.3783643 -0.3790818 -0.3857738
## 4  -0.4462899 -0.44941845 -0.44326061 -0.4375713 -0.4477773 -0.4433087
## 5  -0.1245606 -0.11832299 -0.13041874 -0.1867234 -0.1705833 -0.2180886
## 6  -0.3329597 -0.32790866 -0.33766178 -0.3481677 -0.3337743 -0.3235335
## 7   0.0647823  0.07290835  0.05710997  0.1021938  0.1676500  0.1082987
## 8   0.2910493  0.30259491  0.28011271  0.2629910  0.2771432  0.3682750
## 9  -0.3387870 -0.34103896 -0.33660329 -0.3070787 -0.3641891 -0.3065297
## 10 -0.3271021 -0.32834530 -0.32587140 -0.3229319 -0.3448914 -0.3001132
## ..        ...         ...         ...        ...        ...        ...
## Variables not shown: HC01_VC11 (dbl), HC01_VC12 (dbl), HC01_VC13 (dbl),
##   HC01_VC14 (dbl), HC01_VC15 (dbl), HC01_VC16 (dbl), HC01_VC17 (dbl),
##   HC01_VC18 (dbl), HC01_VC19 (dbl), HC01_VC20 (dbl), HC01_VC23 (dbl),
##   HC01_VC26 (dbl), HC01_VC27 (dbl), HC01_VC28 (dbl), HC01_VC29 (dbl),
##   HC01_VC32 (dbl), HC01_VC33 (dbl), HC01_VC34 (dbl), HC01_VC37 (dbl),
##   HC01_VC38 (dbl), HC01_VC39 (dbl), HC01_VC43 (dbl), HC01_VC44 (dbl),
##   HC01_VC45 (dbl), HC01_VC48 (dbl), HC01_VC49 (dbl), HC01_VC50 (dbl),
##   HC01_VC51 (dbl), HC01_VC52 (dbl), HC01_VC53 (dbl), HC01_VC54 (dbl),
##   HC01_VC55 (dbl), HC01_VC56 (dbl), HC01_VC57 (dbl), HC01_VC58 (dbl),
##   HC01_VC59 (dbl), HC01_VC60 (dbl), HC01_VC61 (dbl), HC01_VC62 (dbl),
##   HC01_VC63 (dbl), HC01_VC64 (dbl), HC01_VC65 (dbl), HC01_VC66 (dbl),
##   HC01_VC67 (dbl), HC01_VC68 (dbl), HC01_VC69 (dbl), HC01_VC70 (dbl),
##   HC01_VC71 (dbl), HC01_VC72 (dbl), HC01_VC73 (dbl), HC01_VC74 (dbl),
##   HC01_VC77 (dbl), HC01_VC78 (dbl), HC01_VC79 (dbl), HC01_VC80 (dbl),
##   HC01_VC81 (dbl), HC01_VC82 (dbl), HC01_VC83 (dbl), HC01_VC87 (dbl),
##   HC01_VC88 (dbl), HC01_VC89 (dbl), HC01_VC90 (dbl), HC01_VC91 (dbl),
##   HC01_VC92 (dbl), HC01_VC93 (dbl), HC01_VC94 (dbl), HC01_VC95 (dbl),
##   HC01_VC96 (dbl), HC01_VC97 (dbl), HC01_VC98 (dbl), HC01_VC99 (dbl),
##   HC01_VC100 (dbl), HC01_VC101 (dbl), HC01_VC102 (dbl), HC01_VC104 (dbl),
##   countyfips (dbl), UR_code_2013 (dbl), ur_code_2006 (dbl), ur_code_1990
##   (dbl), ECHC01_VC03 (dbl), ECHC01_VC04 (dbl), ECHC01_VC05 (dbl),
##   ECHC01_VC06 (dbl), ECHC01_VC07 (dbl), ECHC01_VC08 (dbl), ECHC01_VC09
##   (dbl), ECHC01_VC11 (dbl), ECHC01_VC14 (dbl), ECHC01_VC15 (dbl),
##   ECHC01_VC16 (dbl), ECHC01_VC17 (dbl), ECHC01_VC19 (dbl), ECHC01_VC20
##   (dbl), ECHC01_VC22 (dbl), ECHC01_VC23 (dbl), ECHC01_VC27 (dbl),
##   ECHC01_VC28 (dbl), ECHC01_VC29 (dbl), ECHC01_VC30 (dbl), ECHC01_VC31
##   (dbl), ECHC01_VC32 (dbl), ECHC01_VC33 (dbl), ECHC01_VC36 (dbl),
##   ECHC01_VC40 (dbl), ECHC01_VC41 (dbl), ECHC01_VC42 (dbl), ECHC01_VC43
##   (dbl), ECHC01_VC44 (dbl), ECHC01_VC45 (dbl), ECHC01_VC49 (dbl),
##   ECHC01_VC50 (dbl), ECHC01_VC51 (dbl), ECHC01_VC52 (dbl), ECHC01_VC53
##   (dbl), ECHC01_VC54 (dbl), ECHC01_VC55 (dbl), ECHC01_VC56 (dbl),
##   ECHC01_VC57 (dbl), ECHC01_VC58 (dbl), ECHC01_VC59 (dbl), ECHC01_VC60
##   (dbl), ECHC01_VC61 (dbl), ECHC01_VC62 (dbl), ECHC01_VC66 (dbl),
##   ECHC01_VC67 (dbl), ECHC01_VC68 (dbl), ECHC01_VC69 (dbl), ECHC01_VC70
##   (dbl), ECHC01_VC74 (dbl), ECHC01_VC75 (dbl), ECHC01_VC76 (dbl),
##   ECHC01_VC77 (dbl), ECHC01_VC78 (dbl), ECHC01_VC79 (dbl), ECHC01_VC80
##   (dbl), ECHC01_VC81 (dbl), ECHC01_VC82 (dbl), ECHC01_VC83 (dbl),
##   ECHC01_VC84 (dbl), ECHC01_VC85 (dbl), ECHC01_VC86 (dbl), ECHC01_VC89
##   (dbl), ECHC01_VC90 (dbl), ECHC01_VC91 (dbl), ECHC01_VC92 (dbl),
##   ECHC01_VC93 (dbl), ECHC01_VC94 (dbl), ECHC01_VC97 (dbl), ECHC01_VC98
##   (dbl), ECHC01_VC99 (dbl), ECHC01_VC100 (dbl), ECHC01_VC101 (dbl),
##   ECHC01_VC103 (dbl), ECHC01_VC104 (dbl), ECHC01_VC105 (dbl), ECHC01_VC106
##   (dbl), ECHC01_VC107 (dbl), ECHC01_VC108 (dbl), ECHC01_VC109 (dbl),
##   ECHC01_VC110 (dbl), ECHC01_VC111 (dbl), ECHC01_VC112 (dbl), ECHC01_VC113
##   (dbl), ECHC01_VC114 (dbl), ECHC01_VC115 (dbl), ECHC01_VC118 (dbl),
##   ECHC01_VC120 (dbl), ECHC01_VC121 (dbl), ECHC01_VC122 (dbl), ECHC01_VC124
##   (dbl), ECHC01_VC125 (dbl), ECHC01_VC126 (dbl), ECHC01_VC130 (dbl),
##   ECHC01_VC131 (dbl), ECHC01_VC132 (dbl), ECHC01_VC133 (dbl), ECHC01_VC134
##   (dbl), ECHC01_VC137 (dbl), ECHC01_VC138 (dbl), ECHC01_VC141 (dbl),
##   ECHC01_VC142 (dbl), ECHC01_VC143 (dbl), ECHC01_VC144 (dbl), ECHC01_VC145
##   (dbl), ECHC01_VC146 (dbl), ECHC01_VC147 (dbl), ECHC01_VC148 (dbl),
##   ECHC01_VC149 (dbl), ECHC01_VC150 (dbl), ECHC01_VC151 (dbl), ECHC01_VC152
##   (dbl), ECHC01_VC153 (dbl), ECHC01_VC154 (dbl), ECHC01_VC155 (dbl),
##   ECHC01_VC156 (dbl), ECHC01_VC157 (dbl), Median_Income (dbl),
##   Median_Family_income (dbl), sum_beds (dbl), sum_emergency (dbl),
##   FIPSCode (dbl), Grand_Total (dbl), population (dbl), docs1k (dbl)

Determine the Nmmber of Clusters

#
wss <- (nrow(data2)-1)*sum(apply(data2,2,var))
for (i in 2:30) wss[i] <- sum(kmeans(data2, centers=i)$withinss)

Plots

This is a plot to help determine the number clusters

## Source: local data frame [30 x 2]
## 
##    cluster       wss
##      (chr)     (dbl)
## 1        1 33180.000
## 2        2 11035.510
## 3        3  7392.569
## 4        4  6531.915
## 5        5  6076.613
## 6        6  5920.597
## 7        7  5322.927
## 8        8  5157.568
## 9        9  5056.626
## 10      10  4868.683
## ..     ...       ...
## logical(0)

Now lets look at potential number of clusters using NbClus

output1<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="kl")
output2<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="ch")
output3<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="hartigan")
#output4<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="ccc")
#output5<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="scott")
#output6<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="marriot")
#output7<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="trcovw")
#output8<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="tracew")
#output9<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="friedman")
#output10<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="rubin")
output11<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="cindex")
output12<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="db")
output13<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="silhouette")
output14<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="duda")
output15<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="pseudot2")
output16<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="beale")
output17<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="ratkowsky")
output18<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="ball")
output19<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="ptbiserial")
output20<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="gap")
output21<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="frey")
output22<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="mcclain")
output23<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="gamma")
output24<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="gplus")
output25<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="tau")
output26<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="dunn")
output27<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="hubert")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 
output28<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="sdindex")
output29<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="dindex")

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
## 
output30<-NbClust(data2,min.nc=2,max.nc=20,method="kmeans",index="sdbw")
x1<-data.frame(output1$Best.nc)

#get optional indices for kmeans method

list1<-c("kl","ch","hartigan","cindex","db","silhouette","duda","pseudot2","beale","ratkowsky","ball","ptbiserial","gap","frey","mcclain","gamma","gplus","tau","dunn","sdindex","sdbw")
l2<-data.frame(unlist(list1))


#put together into a table 
table<-rbind(output1$Best.nc[1],
             output2$Best.nc[1],
             output3$Best.nc[1],
             output11$Best.nc[1],
             output12$Best.nc[1],
             output13$Best.nc[1],
             output14$Best.nc[1],
             output15$Best.nc[1],
             output16$Best.nc[1],
             output17$Best.nc[1],
             output18$Best.nc[1],
             output19$Best.nc[1],
             output20$Best.nc[1],
             output21$Best.nc[1],
             output22$Best.nc[1],
             output23$Best.nc[1],
             output24$Best.nc[1],
             output25$Best.nc[1],
             output26$Best.nc[1],
             output28$Best.nc[1],
             output30$Best.nc[1])

table2<-cbind(data.frame(table),data.frame(l2))

# bar plot of cluster results

This Barplot shows the numer of clusters proposed by various methods (see Table 2 below for unlerying data and measures)

barplot(table2$Number_clusters,xlab="Various Methods", ylab="Number of Clusters")

# Show Table 2
table2
##    Number_clusters unlist.list1.
## 1                2            kl
## 2                2            ch
## 3                3      hartigan
## 4               19        cindex
## 5                2            db
## 6                2    silhouette
## 7                3          duda
## 8                3      pseudot2
## 9                3         beale
## 10               2     ratkowsky
## 11               3          ball
## 12               2    ptbiserial
## 13               2           gap
## 14               3          frey
## 15               2       mcclain
## 16               2         gamma
## 17               2         gplus
## 18               4           tau
## 19               2          dunn
## 20              11       sdindex
## 21              20          sdbw

Let’s now look at K Means for 16–Assume there are 16 clusters in GA.

# K-Means Cluster Analysis
fit <- kmeans(data2, 16) # 16 cluster solution
# get cluster means 
data2a<-aggregate(data2,by=list(fit$cluster),FUN=mean)
# append cluster assignment
data3 <- data.frame(data2, data.frame(fit$cluster))
p<-data.frame(cbind(fit$cluster,xran))

How well do clusters match up with the rating areas?

#### Note the square graph–it is a xy projection of corrleation between cluster “proposed”" and actual rating area assuming we have 16 clusters.

fit\(cluster<-as.factor(fit\)cluster) ggplot(data2, aes(HC01_VC03, Median_Family_income,color=fit$cluster)) + geom_point()

dplyr::summarise_each(p, funs(mean))
##         V1     xran
## 1 7.377358 8.320755
p<-dplyr::rename(p,fit=V1,rating_area=xran)
cor(p)
##                     fit rating_area
## fit          1.00000000 -0.04445346
## rating_area -0.04445346  1.00000000
p
##     fit rating_area
## 1     7          14
## 2     7          11
## 3     7           6
## 4    16           1
## 5    10          16
## 6     7          10
## 7     2           2
## 8     2           3
## 9     7          15
## 10    7          15
## 11    4          12
## 12    7          12
## 13    7           6
## 14   10          15
## 15    5          14
## 16   10          14
## 17    7           5
## 18   16           3
## 19    7           1
## 20   15           6
## 21    7          14
## 22    2           4
## 23    2           7
## 24    7           6
## 25   14          14
## 26   15           8
## 27    7          13
## 28   11           3
## 29    2           2
## 30    1           1
## 31   14           3
## 32    7          15
## 33    6           3
## 34   10          11
## 35   10          15
## 36   11           5
## 37    7          15
## 38    4           3
## 39   16          12
## 40    7           1
## 41   16           7
## 42    5          10
## 43   10          15
## 44    6           3
## 45    7          12
## 46    1          12
## 47    2           1
## 48    4           3
## 49    7          15
## 50    7          15
## 51    5          14
## 52    7           2
## 53    7           5
## 54    7          14
## 55   13           9
## 56   11           3
## 57    2          13
## 58   11           3
## 59    7          10
## 60    6           3
## 61   13          13
## 62    3           5
## 63    2           6
## 64    9          13
## 65   10          15
## 66   13           2
## 67    6           3
## 68    9          10
## 69    4          10
## 70    1          16
## 71   16           4
## 72    5           8
## 73    7          10
## 74   16           4
## 75    4           3
## 76    4          12
## 77    3          15
## 78    5           2
## 79   16           3
## 80    7          11
## 81    1           5
## 82    1           5
## 83    3          11
## 84    5          12
## 85   16           3
## 86    8          15
## 87    9          11
## 88    5           1
## 89   12          14
## 90    3           5
## 91   16          14
## 92    2          15
## 93    9          10
## 94    8           5
## 95   16           6
## 96    1           8
## 97   16           2
## 98   16           8
## 99   16           8
## 100   3          15
## 101   1           1
## 102  16          12
## 103   3          11
## 104  16           2
## 105   9           9
## 106  14           8
## 107   2           3
## 108   5           2
## 109  16           2
## 110   4           3
## 111   9          12
## 112   5          13
## 113   8           6
## 114  16           3
## 115   9          13
## 116   8          12
## 117  13          12
## 118   1           8
## 119  13          10
## 120   3           1
## 121  14           5
## 122  12           3
## 123   8           1
## 124   3          14
## 125   3          15
## 126   2           3
## 127   8          10
## 128   1           8
## 129   9           1
## 130   3           8
## 131   1           5
## 132   3          14
## 133   3           8
## 134   1          11
## 135   3           1
## 136   9          15
## 137   9          15
## 138   9          11
## 139  13          10
## 140   3          11
## 141   9           8
## 142   3          15
## 143   8          12
## 144  13          10
## 145   8           8
## 146   2           7
## 147   2           3
## 148   9           6
## 149   3           5
## 150   3          16
## 151   9           6
## 152   1           8
## 153   3          11
## 154  13          10
## 155   2           9
## 156   3          12
## 157   3           5
## 158   3          16
## 159   8           1
f1<-ggplot(p,aes(fit,rating_area))+geom_point()
f1<-f1+labs(y="Rating Area", x="K Means Fit for 16 Clusters")
f1<-f1+ggtitle('Number of Clusters in GA Counties')
f1

Can we look at underlying data and clusters?

fit$cluster<-as.factor(fit$cluster)
ggplot(data1, aes(HC01_VC03, Median_Family_income,color=fit$cluster)) + geom_point() +labs(x="Population in County",y="Med. Family Income")+ggtitle("Cluster Analysis: GA. County Demogs #1")

ggplot(data1, aes(UR_code_2013, Median_Family_income,color=fit$cluster)) + geom_point() +labs(x="<- Most Urban -- Urban Code -- Least Urban ->",y="Med. Family Income")+ggtitle("Cluster Analysis: GA. County Demogs #2")

ggplot(data1, aes(pct_uninsured, Median_Family_income,color=fit$cluster)) + geom_point() +labs(x="Percent Uninsured",y="Med. Family Income")+ggtitle("Cluster Analysis: GA. County Demogs #3")

ggplot(data1, aes(pct_unemployed, Median_Family_income,color=fit$cluster)) + geom_point() +labs(x="Percent Unemployed",y="Med. Family Income")+ggtitle("Cluster Analysis: GA. County Demogs #4")

Hierarchical Analysis

Now we see if we can do anyting else ….

d <- dist(data2, method = "euclidean") # distance matrix

fit <- hclust(d, method="ward.D") 
plot(fit,hang=-1,labels=FALSE,xlab="",sub="", main="Hierarchical 16 Cluster") # display dendogram

groups <- cutree(fit, k=16) # cut tree into 16 clusters
plot(groups, main="Countys in each of 16 Groups")

# draw dendogram with red borders around the 16 clusters 
rect.hclust(fit, k=16, border="red")

#counties in each cluster
  t<-rect.hclust(fit, k=16, border="red")

# show realtionship between clsuter and rating area
#show t
t
## [[1]]
## [1] 60
## 
## [[2]]
## [1] 67
## 
## [[3]]
## [1] 33
## 
## [[4]]
## [1] 44
## 
## [[5]]
## [1]  28  36  38  48  56  58  75  76 110
## 
## [[6]]
## [1] 31
## 
## [[7]]
## [1]  25 106 121
## 
## [[8]]
##  [1]   1   2   3   9  10  12  13  17  19  21  24  27  32  34  37  40  43
## [18]  45  46  49  50  52  55  59  73  81  82  86 101
## 
## [[9]]
##  [1]  14  30  53  65  70  96 116 118 120 123 128 131 133 134 135 142 143
## [18] 149 152 153 156 157 158
## 
## [[10]]
##  [1]  87 105 115 127 129 132 136 137 138 145 148 150 151 159
## 
## [[11]]
##  [1]  39  54  61  62  66  77  80  83  90  93 100 103 117 119 124 125 130
## [18] 139 140 144 154
## 
## [[12]]
## [1]   8  11  22  29  47  69  92 155
## 
## [[13]]
## [1]  89 122
## 
## [[14]]
##  [1]   5   7  16  20  23  26  35  57  63  64  68  78 107 126 141 146 147
## 
## [[15]]
## [1]  15  51  72  88 108
## 
## [[16]]
##  [1]   4   6  18  41  42  71  74  79  84  85  91  94  95  97  98  99 102
## [18] 104 109 111 112 113 114
group5<-cutree(fit,k=5)
plot(group5,main="Countys in each of 5 Groups")

plot(fit,hang=-1,labels=FALSE,xlab="",sub="", main="Hierarchical 5 Cluster") # display dendogram

rect.hclust(fit, k=5, border="blue")

Conclusion

Lots of ways to get at number of clusters–but it is not likley to be 16!