Start R
load package if necessary: I don’t know what my installation on your computer has, but you may need to install the package ggplot2 by running >install.packages(“ggplot2”)

Load the good plotting package

require(ggplot2)

## Loading required package: ggplot2

get data: export your numbers document as a CSV, then read it into R (mine happened to be in Downloads)

instar.measurements <- read.table("~/Downloads/instar measurements.csv", sep="," , h=TRUE) #read a document where the seperators are ",", and which has a header row (h=TRUE); and save this into the new variable instar.measurements

head(instar.measurements) #take a look at the first few rows with the head command

##       Document sex instar       eye    femur  pronotum femur.eye
## 1 P1220507.psd         NA 198.73261 628.6017 200.56008  3.163053
## 2 P1220949.psd         NA 155.36288 521.1288 176.89872  3.354269
## 3 P1220949.psd         NA 152.89075 499.6399 177.44362  3.267954
## 4 DSCN9670.psd         NA  79.71865 296.0612  95.74064  3.713825
## 5 DSCN9674.psd         NA  89.80126 287.6240  88.67164  3.202895
## 6 P1230134.psd          1 105.16348 370.9281 112.41112  3.527157
##   femur.pronotum pronotum.femur.x.10 pronotum.eye       date duration
## 1       3.134232            3.190575     1.009196 06/13/2014     23:3
## 2       2.945916            3.394530     1.138616 06/21/2014     24:4
## 3       2.815767            3.551430     1.160591 06/21/2014     24:4
## 4       3.092325            3.233813     1.200982  6/23/2009     24:6
## 5       3.243697            3.082902     0.987421  6/23/2009     24:6
## 6       3.299746            3.030537     1.068918 06/25/2014     25:1
##   day.of.year
## 1         164
## 2         172
## 3         172
## 4         174
## 5         174
## 6         176

start plotting right away!

ggplot(instar.measurements,aes(femur.eye,femur.pronotum)) + # set up the plot with femur.eye on x axis, and femur.pronotum on the y. The plus indicates there's more to come and R shouldn't evaluate yet.
  geom_point(alpha=I(0.3)) +   #plot as points with 50% transparency
  theme_bw() #a nice clean chart style

Looks good, but we can add some things:

date

ggplot(instar.measurements,aes(femur.eye,femur.pronotum)) +
      geom_point(aes(size=day.of.year),alpha=I(0.3)) + #add size by day of year
      theme_bw()

#plotting date make it clear that the relationship (at least of femur/eye), is going in the directions we'd expect.

instar

ggplot(instar.measurements,aes(femur.eye,femur.pronotum)) +
      geom_point(aes(size=day.of.year),alpha=I(0.3)) +
      geom_text(aes(label=instar)) + #add text labels for instar
      theme_bw()

sex

ggplot(instar.measurements,aes(femur.eye,femur.pronotum)) +
      geom_point(aes(size=day.of.year),alpha=I(0.3)) +
      geom_text(aes(label=instar)) +
      geom_point(data=subset(instar.measurements, sex!=""),aes(colour=sex,size=day.of.year),pch=1) + #ignoring the points for which sex is blank, draw rings (pch=1), appropriately size, whose color indicates sex.
      theme_bw()

## Warning: Removed 27 rows containing missing values (geom_text).

Alternative axes

ggplot(instar.measurements,aes(femur.eye,pronotum.eye)) +
      geom_point(aes(size=day.of.year),alpha=I(0.3)) +
      geom_text(aes(label=instar)) +
        geom_point(data=subset(instar.measurements, sex!=""),aes(colour=sex,size=day.of.year),pch=1) +
      theme_bw()

Alternative axes

ggplot(instar.measurements,aes(femur.pronotum,pronotum.eye)) +
      geom_point(aes(size=day.of.year),alpha=I(0.3)) +
      geom_text(aes(label=instar)) + 
      geom_point(data=subset(instar.measurements, sex!=""),aes(colour=sex,size=day.of.year),pch=1) +
      theme_bw()

Just for fun, lets see what K-means clustering does with it. First with the best axes, which is maybe femur.eye? We’ll drop sex for now and set coour to equal the categorization from Kmeans

clusters<-kmeans(instar.measurements$femur.eye, #what data
                 6,                             #how many groups
                 iter.max=50,                   #default algorithm settings
                 nstart=50
                 )

ggplot(instar.measurements, aes(femur.eye,0)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=day.of.year),alpha=I(0.3)) +
  geom_text(aes(label=instar))

Here’s Kmeans clustering considering that second data axis as well (Petal.Width).

clusters<-kmeans(instar.measurements[,c(7,10)],  #consider columns 7 & 10
                6,
                iter.max=50,
                nstart=50
                )

ggplot(instar.measurements, aes(femur.eye,pronotum.eye)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=day.of.year),alpha=I(0.3)) +
  geom_text(aes(label=instar))

Addiing in all the axes (3 morphological ratios and day of year), scaling first to make sure they’re comparable units.

data <- instar.measurements[,c(7,8,10,13)]

data <- scale(data)

clusters<-kmeans(data,
                6,
                iter.max=50,
                nstart=50
                ) 

ggplot(instar.measurements, aes(femur.eye,pronotum.eye)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=day.of.year),alpha=I(0.3)) +
  geom_text(aes(label=instar))

To me, this seems to do extremely well – by adding in day. of year, we’re now identifing all of the first-instar individuals in one group (blue), and the adults are all put in two groups (green and olive, which by the way look to be at least loosely associated with sex)

This becomes more clear by plotting day of year on the x axis:

ggplot(instar.measurements, aes(day.of.year,pronotum.eye)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=4),alpha=I(0.3)) +
  geom_text(aes(label=instar))

UPDATED HERE

So here’s the final thing that maybe gets at your question (identify the three middle instars). This is all the data columns, with adult rows removed for now to make the clustering algorithm’s job easier. We’ll run the clusters once, and visualize them by applying them to the data plotted along several different pairs of axes.

data <- subset(instar.measurements,instar<5|is.na(instar))

ddata <- data[,c(7,8,10,13)]

cdata <- scale(ddata)

clusters<-kmeans(cdata,
                4,
                iter.max=50,
                nstart=50
                ) 

ggplot(data, aes(femur.eye,pronotum.eye)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=day.of.year),alpha=I(0.3)) +
  geom_text(aes(label=instar))

ggplot(data, aes(pronotum.eye,femur.pronotum)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=day.of.year),alpha=I(0.3)) +
  geom_text(aes(label=instar))

ggplot(data,aes(day.of.year,femur.eye)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=4),alpha=I(0.3)) +
  geom_text(aes(label=data$instar))

Finally, with ordiation we can reduced our dimesions down to two, and so get a (slightly squished) 2D view of the whole thing, which is presumably more similar to 4-dimensional shape in which Kmeans is working:

require(vegan) #laod ordination package

## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.3-2

hopMDS <- metaMDS(cdata+2) #run a non-metric multidimensional scaling algorithm on our data

## Run 0 stress 0.08107029 
## Run 1 stress 0.08133139 
## ... procrustes: rmse 0.03828469  max resid 0.1331709 
## Run 2 stress 0.08133137 
## ... procrustes: rmse 0.03826607  max resid 0.1331509 
## Run 3 stress 0.1033685 
## Run 4 stress 0.09217701 
## Run 5 stress 0.1070942 
## Run 6 stress 0.3969992 
## Run 7 stress 0.1116534 
## Run 8 stress 0.1127498 
## Run 9 stress 0.09545618 
## Run 10 stress 0.1027006 
## Run 11 stress 0.0961535 
## Run 12 stress 0.08137107 
## ... procrustes: rmse 0.02258616  max resid 0.07050449 
## Run 13 stress 0.08266918 
## Run 14 stress 0.08087881 
## ... New best solution
## ... procrustes: rmse 0.01182668  max resid 0.04452813 
## Run 15 stress 0.1144524 
## Run 16 stress 0.09285081 
## Run 17 stress 0.09217758 
## Run 18 stress 0.09532568 
## Run 19 stress 0.08054607 
## ... New best solution
## ... procrustes: rmse 0.03543392  max resid 0.1286653 
## Run 20 stress 0.0813314

ggplot(data.frame(hopMDS$points), aes(MDS1,MDS2)) + 
  geom_point(aes(colour=as.factor(clusters$cluster),size=4),alpha=I(0.3)) +
  geom_text(aes(label=data$instar))

We can plot the same ordination with index numbers:

plot(hopMDS,type="t",display="sites")

ordispider(hopMDS,clusters$cluster)

And here are the file names for these:

data.frame(data$Document,clusters$cluster)

##    data.Document clusters.cluster
## 1   P1220507.psd                2
## 2   P1220949.psd                2
## 3   P1220949.psd                2
## 4   DSCN9670.psd                2
## 5   DSCN9674.psd                2
## 6   P1230134.psd                2
## 7   P1230162.psd                3
## 8   P1230225.psd                3
## 9   P1230234.psd                2
## 10  P1230284.psd                3
## 11  P1230322.psd                3
## 12  P1230379.psd                2
## 13  P1230381.psd                2
## 14  P1230415.psd                3
## 15  P1230150.psd                3
## 16  P1230808.psd                2
## 17  P1230810.psd                2
## 18  DSCN0479.psd                2
## 19  DSCN1153.psd                2
## 20  DSCN1324.psd                1
## 21  DSCN1337.psd                1
## 22  DSCN1347.psd                1
## 23  DSCN1431.psd                2
## 24  DSCN1441.psd                1
## 25  DSCN1459.psd                2
## 26  P1250452.psd                1
## 27  P1250509.psd                1
## 28  P1250509.psd                1
## 29  DSCN1858.psd                1
## 30  DSCN9740.psd                3
## 31  DSCN9824.psd                2
## 32  P1040583.psd                1
## 33  P1040648.psd                1
## 34  DSCN5600.psd                4
## 35  DSCN5602.psd                4
## 40  P1160143.psd                4
## 46  DSCN7458.psd                4

Nisquallia

February 29, 2016

UPDATED HERE