#Paper 1: Cross-cultural analysis
#Paper 2: Demographic (and linguistic??) - tie in Living Tongues institute?

#Load data
require(vegan)
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-2
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.6
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
require(reshape2)
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
community_dataset <- read_csv("~/Dropbox/p/East Himalaya/community dataset.csv")
## Parsed with column specification:
## cols(
##   Sl.no = col_integer(),
##   `Name of the Informant` = col_character(),
##   Gender = col_character(),
##   Age = col_integer(),
##   Occupation = col_character(),
##   Education = col_character(),
##   `Local name of the plant` = col_character(),
##   `Botanical name of the plant` = col_character(),
##   `Used as` = col_character(),
##   `Plant part used` = col_character(),
##   `Disease treated` = col_character(),
##   Habit = col_character(),
##   `Mode of Preparation` = col_character(),
##   `Mode of administration` = col_character(),
##   Ethnicity = col_character()
## )
#clean data a bit
community_dataset %>% mutate(
  
  Informant.name=`Name of the Informant`,
  Informant.age=`Age`,
  Informant.gender=`Gender`,
  Scientific.name=`Botanical name of the plant`,
  Use.Category=`Used as`
  ) -> assam

###Clean Demographics

##Age and gender ok
assam %>%
  group_by(Informant.name) %>%
  summarize(age=unique(Informant.age),
         gender=unique(Informant.gender),
         Education=unique(Education),
         Ethnicity=unique(Ethnicity))
## # A tibble: 420 x 5
##    Informant.name     age gender Education   Ethnicity
##    <chr>            <int> <chr>  <chr>       <chr>    
##  1 Aaroti Das          40 Female H.S         Assamese 
##  2 Aarti Mili          24 Female Matriculate Mishing  
##  3 Abhilesh Narzary    29 Male   Graduate    Bodo     
##  4 Agni Munda          18 Female Class V     Adivasi  
##  5 Ajen Tati           35 Male   Class 8     Adivasi  
##  6 Ajit Borah          62 Male   12th pass   Assamese 
##  7 Ajit Naik           62 Male   Nil         Adivasi  
##  8 Albert Marak        27 Male   Matriculate Garo     
##  9 Alimia Mili         24 Female Matriculate Mishing  
## 10 Alina Orang         50 Female Nil         Adivasi  
## # ... with 410 more rows
qplot(assam$Age)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##need to standardize education
education_ranks <- read_csv("~/Dropbox/p/East Himalaya/education ranks.csv")
## Parsed with column specification:
## cols(
##   Education = col_character(),
##   Education_rank = col_integer()
## )
#beacuse I can't add in Office as I'm on a plane and need to activate, have to add in Matriculate manually
education_ranks[35,1]<-"Matriculate"
merge(assam,education_ranks,by="Education",all=T) -> assam

##Plant names
assam %>%
  group_by(Scientific.name) %>% 
  summarize(n()) %>% dim()
## [1] 269   2
#Fix typos (could standardize against TROPICOS)
dplyr::recode(assam$Scientific.name,
              'Ageratum conzoides'='Ageratum conizoides',
              'Alocasia acuminata'='Alocassia acuminata',
              'Alocasia cuminata'='Alocassia acuminata', #note check this
              'Alocassia cuminata'='Alocassia acuminata', #note check this
              'Aloe-vera'='Aloe vera',
              'Alpinia niagra'='Alpinia nigra',
              'Amarunthus spinosus'='Amaranthus spinosus',
              'Ananas comosus'='Ananas cosmosus',
              'Averrhoa carambola'='Averrhoea carambola',
              'Bambusa sp'='Bambusa sp.',
              'Cactus sp'='Cactus sp.',
              'Capsicum sp'='Capsicum sp.',
              'Ceratopteristhalictroides'='Ceratopteris thalictroides',
              'Cissus quadangularis'='Cissus quadrangularis',
              'Cissus quandrangularis'='Cissus quadrangularis',
              'Citrus sp'='Citrus sp.',
              'Colocasia affinis'='Colocassia affinis',
              'Curcuma longa'='Cucurma longa',
              'Cucurma caesia'='Curcuma caesia',
              'Diplazium esculantumm'='Diplazium esculantum',
              'Diplazium esculantum'='Diplazium esculantum',
              'Ficus bengalenses'='Ficus bengalensis',
              'Hibiscus rosa sinensis'='Hibiscus rosa-sinensis',
              'Houttunya cordata'='Houttuynia cordata',
              'Houtunnya cordata'='Houttuynia cordata',
              'Hydrocotyle roundifolia'='Hydrocotyle rotundifolia',
              'Ipomoea aquatica'='Ipomea aquatica',
              'Momordica dioica'='Momordica dioca',
              'Musa Balbisiana'='Musa balbisiana',
              'Musa parasidica'='Musa paradisiaca',
              'Musa sp'='Musa sp.',
              'Nycthanthusarbor-tristis'='Nycthanthus arbortritis',
              'Paederia foetida'='Paedaria foetida',
              'Phlogocanthus thrysiformis'='Phlogocanthus thyrsiflorus',
              'Piper Nigrum'='Piper nigrum',
              'Saccharum Spontaneum'='Saccharum spontaneum',
              'Sarcochlamy spulcherrima'='Sarcochlamys pulcherrima',
              'Solonum indicum'='Solanum indicum',
              'Spilanthus panniculata'='Spilanthus paniculata',
              'Stenochlaenapalustris'='Stenochlaena palustris',
              'Tagetes sp'='Tagetes sp.',
              'Vitex neguno'='Vitex negundo',
              'Zanthoxylamoxy phyllum'='Zanthoxylum oxyphyllum',
              'Zingiber officinale'='Zingiber officinalis',
              'Zizyphus marituana'='Ziziphus mauritiana'    
               ) -> assam$Scientific.name

assam %>%
  group_by(Scientific.name) %>%
  summarize(n()) %>% dim() #nice, this fixed 39 synonyms
## [1] 230   2
##Use categories
assam %>%
  group_by(Use.Category) %>%
  summarize(n())
## # A tibble: 16 x 2
##    Use.Category                             `n()`
##    <chr>                                    <int>
##  1 Broom                                        6
##  2 Constructing Apo/Saloni used for fishing     1
##  3 Food                                      2209
##  4 Food (used in rice beer)                     4
##  5 Food/ Medicine                             116
##  6 Food/medicine                                1
##  7 Food/Medicine                              207
##  8 Food/Ritual                                  3
##  9 Medicine                                  1114
## 10 Medicine/Ritual                              5
## 11 Medicne                                      1
## 12 Ritual                                     215
## 13 Ritual/Medicine                              4
## 14 Toli, Sakoni used for fishing                1
## 15 Wine                                         2
## 16 <NA>                                         1
#Standardize
dplyr::recode(assam$Use.Category,
              'Broom'='Craft',
              'Constructing Apo/Saloni used for fishing'='Craft',
              'Food (used in rice beer)'='Food',
              'Food/ Medicine'='Food and Medicine',
              'Food/medicine'='Food and Medicine',
              'Food/Medicine'='Food and Medicine',
              'Food/Ritual'='Food and Ritual',
              'Food/Ritual'='Food and Ritual',
              'Medicne'='Medicine',
              'Ritual/Medicine'='Ritual and Medicine',
              'Medicine/Ritual'='Ritual and Medicine',
              'Toli, Sakoni used for fishing'='Craft',
              'Wine'='Food')-> assam$Use.Category

assam %>%
  group_by(Use.Category) %>%
  summarize(n())
## # A tibble: 8 x 2
##   Use.Category        `n()`
##   <chr>               <int>
## 1 Craft                   8
## 2 Food                 2215
## 3 Food and Medicine     324
## 4 Food and Ritual         3
## 5 Medicine             1115
## 6 Ritual                215
## 7 Ritual and Medicine     9
## 8 <NA>                    1
#Expand "and" categories to multiple rows, so that a report of "Food and Medicine" becomes seperate Med and food reports
dim(assam)
## [1] 3890   21
Use_category_expansion <- read_csv("~/Documents/Use category expansion.csv")
## Parsed with column specification:
## cols(
##   Use.Category = col_character(),
##   Use.Category.Expanded = col_character()
## )
merge(assam,Use_category_expansion,by="Use.Category",all.x=T)->assam
dim(assam)
## [1] 4226   22
head(assam)
##   Use.Category Education Sl.no Name of the Informant Gender Age
## 1        Craft   Class 9    24        Tireswari Mili Female  32
## 2        Craft       Nil     2         Deepika Munda Female  25
## 3        Craft       Nil    43          Bimla Bhumij Female  68
## 4        Craft       Nil     6           Nazira Mili   Male  85
## 5        Craft       Nil    22           Arun Gowala   Male  50
## 6        Craft       Nil     2         Deepika Munda Female  25
##    Occupation Local name of the plant Botanical name of the plant
## 1   Housewife                 Eku/Bah                 Bambusa sp.
## 2   Housewife                   Datun            Sida rhombifolia
## 3   Housewife               Jharu gos       Thysanolaena agrostis
## 4      Farmer                 Eku/Bah                  Bambusa sp
## 5 Daily wages               Jharu gos       Thysanolaena agrostis
## 6   Housewife                   Bedna                        <NA>
##                                    Used as     Plant part used
## 1            Toli, Sakoni used for fishing               Shoot
## 2                                    Broom Branches and leaves
## 3                                    Broom Branches and leaves
## 4 Constructing Apo/Saloni used for fishing               Shoot
## 5                                    Broom Branches and leaves
## 6                                    Broom            Branches
##   Disease treated Habit
## 1            <NA>  Tree
## 2            <NA> Shrub
## 3            <NA> Shrub
## 4            <NA>  Tree
## 5            <NA> Shrub
## 6            <NA> Shrub
##                                            Mode of Preparation
## 1                                                         <NA>
## 2                                                         <NA>
## 3                                                         <NA>
## 4                                                         <NA>
## 5 First dry the parts for 6-7 days and then tie it in a bundle
## 6                                                         <NA>
##   Mode of administration Ethnicity Informant.name Informant.age
## 1                   <NA>   Mishing Tireswari Mili            32
## 2                   <NA>   Adivasi  Deepika Munda            25
## 3                   <NA>   Adivasi   Bimla Bhumij            68
## 4                   <NA>   Mishing    Nazira Mili            85
## 5                   <NA>   Adivasi    Arun Gowala            50
## 6                   <NA>   Adivasi  Deepika Munda            25
##   Informant.gender       Scientific.name Education_rank
## 1           Female           Bambusa sp.              3
## 2           Female      Sida rhombifolia              0
## 3           Female Thysanolaena agrostis              0
## 4             Male           Bambusa sp.              0
## 5             Male Thysanolaena agrostis              0
## 6           Female                  <NA>              0
##   Use.Category.Expanded
## 1                 Craft
## 2                 Craft
## 3                 Craft
## 4                 Craft
## 5                 Craft
## 6                 Craft
#drop one plant with NA uses
assam[!is.na(assam$Use.Category.Expanded),]->assam

#make use category expanded into use category so I don't have to change code for now (could fix this later)
assam$Use.Category<-assam$Use.Category.Expanded

#make environmental matrix
assam %>%
  group_by(Informant.name) %>%
  summarize(gender=unique(Informant.gender),
            age=unique(Informant.age),
            ethnicity=unique(Ethnicity),
            education=unique(Education_rank)
  )-> plant.env

assam %>%
  filter(!is.na(Use.Category))%>%
  group_by(Informant.name) %>%
  summarize(gender=unique(Informant.gender),
            age=unique(Informant.age),
            ethnicity=unique(Ethnicity),
            education=unique(Education_rank)
  )-> use.env

#make plant and use matrixes
assam %>%
    #filter(!is.na(Use.Category))%>%
    select(Informant.name,Scientific.name) %>%
    dcast(Informant.name~Scientific.name) -> mat
## Using Scientific.name as value column: use value.var to override.
## Aggregation function missing: defaulting to length
  row.names(mat) <- mat$Informant.name
  mat <- mat[-1]
  mat[is.na(mat)]<-0
  mat->plant.mat

#This function will do the same for uses. I used straight use categories, because using plantuses gives me something exactly the same as using plants
assam %>%
    filter(!is.na(Use.Category))%>%
    select(Informant.name,Use.Category) %>% 
    dcast(Informant.name~Use.Category) -> mat
## Using Use.Category as value column: use value.var to override.
## Aggregation function missing: defaulting to length
  row.names(mat) <- mat$Informant.name
  mat <- mat[-1]
  mat[is.na(mat)]<-0
  mat->use.mat
  
#and plantuse
assam %>%
    filter(!is.na(Use.Category))%>%
    mutate(plantuse=paste(Scientific.name,Use.Category)) %>%
    select(Informant.name,plantuse) %>% 
    dcast(Informant.name~plantuse) -> mat
## Using plantuse as value column: use value.var to override.
## Aggregation function missing: defaulting to length
  row.names(mat) <- mat$Informant.name
  mat <- mat[-1]
  mat[is.na(mat)]<-0
  mat->plantuse.mat
  
  #how many dimensions should I use? (maybe 3 or 4?)
  NMDS.scree<-function(x) { #where x is the name of the data frame variable
    plot(rep(1,4),replicate(4,metaMDS(x,autotransform=T,k=1)$stress),xlim=c(1,6),ylim=c(0,0.5),xlab="# of Dimensions",ylab="Stress",main="NMDS stress plot")
    for (i in 1:6) {
      points(rep(i+1,4),replicate(4,metaMDS(x,distance="euclidian",autotransform=T,k=i+1)$stress))
    }
  }
  
#NMDS.scree(plant.mat)
  
  #how many groups do I have? #2 or 5 groups
#plot(cascadeKM(vegdist(mat,method="euclidian"), 3, 6, iter = 500, criterion = "calinski")) (my computer too weak!)
  
  #make mds with 2 dimensions
 # mds<-metaMDS(mat,k=2,distance="euclidian",autotransform=T,trymax=50)
# ordiplot(mds,type='t')
  
plant.mat %>% metaMDS(dist="bray",k=3) -> plant.ord #vey good, stress <20
## Wisconsin double standardization
## Run 0 stress 0.1962313 
## Run 1 stress 0.1962148 
## ... New best solution
## ... Procrustes: rmse 0.01115025  max resid 0.07076447 
## Run 2 stress 0.1968046 
## Run 3 stress 0.1961903 
## ... New best solution
## ... Procrustes: rmse 0.01170506  max resid 0.08831525 
## Run 4 stress 0.1958962 
## ... New best solution
## ... Procrustes: rmse 0.01226803  max resid 0.1425901 
## Run 5 stress 0.2008338 
## Run 6 stress 0.1960901 
## ... Procrustes: rmse 0.009432989  max resid 0.07322056 
## Run 7 stress 0.1982022 
## Run 8 stress 0.1959133 
## ... Procrustes: rmse 0.01031106  max resid 0.1288221 
## Run 9 stress 0.1986733 
## Run 10 stress 0.1967388 
## Run 11 stress 0.1970135 
## Run 12 stress 0.1993735 
## Run 13 stress 0.1971877 
## Run 14 stress 0.2013196 
## Run 15 stress 0.1989059 
## Run 16 stress 0.1982978 
## Run 17 stress 0.1972811 
## Run 18 stress 0.1973392 
## Run 19 stress 0.1967464 
## Run 20 stress 0.2020307 
## *** No convergence -- monoMDS stopping criteria:
##     12: no. of iterations >= maxit
##      8: stress ratio > sratmax
use.mat %>% metaMDS(dist="euclidian") -> use.ord #check this
## Wisconsin double standardization
## Run 0 stress 0.0464434 
## Run 1 stress 0.1901166 
## Run 2 stress 0.1033631 
## Run 3 stress 0.1893955 
## Run 4 stress 0.1593276 
## Run 5 stress 0.07025835 
## Run 6 stress 0.1159641 
## Run 7 stress 0.1680902 
## Run 8 stress 0.171004 
## Run 9 stress 0.07160334 
## Run 10 stress 0.1369807 
## Run 11 stress 0.05921883 
## Run 12 stress 0.09413748 
## Run 13 stress 0.1820808 
## Run 14 stress 0.160971 
## Run 15 stress 0.1897816 
## Run 16 stress 0.18969 
## Run 17 stress 0.1889496 
## Run 18 stress 0.1900645 
## Run 19 stress 0.1756312 
## Run 20 stress 0.1841546 
## *** No convergence -- monoMDS stopping criteria:
##     17: stress ratio > sratmax
##      3: scale factor of the gradient < sfgrmin
plantuse.mat %>% metaMDS(dist="euclidian") -> plantuse.ord #good, stress <20
## Run 0 stress 0.1511594 
## Run 1 stress 0.1557177 
## Run 2 stress 0.1525582 
## Run 3 stress 0.1467797 
## ... New best solution
## ... Procrustes: rmse 0.01740769  max resid 0.1337483 
## Run 4 stress 0.153801 
## Run 5 stress 0.152583 
## Run 6 stress 0.155334 
## Run 7 stress 0.1526096 
## Run 8 stress 0.144863 
## ... New best solution
## ... Procrustes: rmse 0.01535699  max resid 0.1374212 
## Run 9 stress 0.1465494 
## Run 10 stress 0.1583972 
## Run 11 stress 0.1492531 
## Run 12 stress 0.1478315 
## Run 13 stress 0.1551337 
## Run 14 stress 0.1494465 
## Run 15 stress 0.1537329 
## Run 16 stress 0.145503 
## Run 17 stress 0.1507956 
## Run 18 stress 0.1513942 
## Run 19 stress 0.1524744 
## Run 20 stress 0.1481116 
## *** No convergence -- monoMDS stopping criteria:
##      6: no. of iterations >= maxit
##     14: stress ratio > sratmax
#all of these could probably use more runs ultimately

#Plantspace fit
plant.efit <-envfit(plant.ord,plant.env[,-1],na.rm=TRUE) #ethnicity and education (gender)
plant.efit
## 
## ***VECTORS
## 
##              NMDS1    NMDS2     r2 Pr(>r)    
## age        0.92450 -0.38119 0.0042  0.457    
## education -0.62588 -0.77992 0.0985  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                     NMDS1   NMDS2
## genderFemale      -0.0223  0.0559
## genderMale         0.0237 -0.0592
## ethnicityAdivasi  -0.2188  0.2047
## ethnicityAssamese -0.4089 -0.3618
## ethnicityBodo     -0.3563 -0.1195
## ethnicityGaro      0.2038 -0.3661
## ethnicityMishing   0.0905  0.2777
## ethnicityNepali   -0.0097  0.5202
## ethnicityNyishi    0.6994 -0.1552
## 
## Goodness of fit:
##               r2 Pr(>r)    
## gender    0.0090  0.021 *  
## ethnicity 0.5263  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#Usespace fit
use.efit <-envfit(use.ord,use.env[-1],na.rm=TRUE) #ethnicity and education
use.efit
## 
## ***VECTORS
## 
##              NMDS1    NMDS2     r2 Pr(>r)    
## age        0.91412 -0.40543 0.0071  0.216    
## education -0.17995  0.98368 0.1317  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                     NMDS1   NMDS2
## genderFemale       0.0014 -0.0247
## genderMale        -0.0015  0.0262
## ethnicityAdivasi  -0.1492 -0.3855
## ethnicityAssamese  0.1316  0.0892
## ethnicityBodo     -0.2213  0.1327
## ethnicityGaro     -0.1232  0.1470
## ethnicityMishing  -0.1493 -0.1272
## ethnicityNepali    0.5017  0.0173
## ethnicityNyishi    0.0097  0.1265
## 
## Goodness of fit:
##               r2 Pr(>r)    
## gender    0.0037  0.213    
## ethnicity 0.4971  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#plantuse fit
plantuse.efit <-envfit(plantuse.ord,use.env[-1],na.rm=TRUE) #ethnicity and education
plantuse.efit
## 
## ***VECTORS
## 
##              NMDS1    NMDS2     r2 Pr(>r)    
## age        0.99623  0.08675 0.0104  0.111    
## education -0.32915 -0.94428 0.0637  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                     NMDS1   NMDS2
## genderFemale       0.0149  0.0099
## genderMale        -0.0158 -0.0105
## ethnicityAdivasi   0.1377  0.8743
## ethnicityAssamese -0.6244  0.0318
## ethnicityBodo     -0.4657 -0.0773
## ethnicityGaro      0.9324  0.0715
## ethnicityMishing  -0.0218 -0.8983
## ethnicityNepali   -0.7033  0.1291
## ethnicityNyishi    0.7451 -0.1312
## 
## Goodness of fit:
##               r2 Pr(>r)    
## gender    0.0002  0.918    
## ethnicity 0.2914  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#Plantspace plot
plant.efit <-envfit(plant.ord,plant.env[,"ethnicity"],na.rm=TRUE)
centroids<-data.frame(plant.efit$factors$centroids)
  centroids$ethnicity<-substr(rownames(centroids),10,17)
  #centroids$NMDS2<-centroids$NMDS2+0.07
  locations <- data.frame(plant.ord$points,plant.env)
  locations<-merge(locations,centroids,by="ethnicity")
  
  ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
    geom_point(alpha=I(0.3))+
    geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
    theme_bw()+
    #scale_x_continuous("",limits=c(-1.5,1.5))+
    #scale_y_continuous("",limits=c(-1.5,1.5))+
    annotate(
      geom="text",
      x=centroids$NMDS1,
      y=centroids$NMDS2,
      label=centroids$ethnicity
    )

  #zoom
  ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
    geom_point(alpha=I(0.3))+
    geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
    theme_bw()+
    scale_x_continuous("",limits=c(-1.5,1))+
    scale_y_continuous("",limits=c(-1.5,1.5))+
    annotate(
      geom="text",
      x=centroids$NMDS1,
      y=centroids$NMDS2,
      label=centroids$ethnicity
    )
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_segment).

#summary: Nyishi divergent, Msihing very tight, others generally overlapping
  
#Usespace plot
  use.efit <-envfit(use.ord,use.env[,"ethnicity"],na.rm=TRUE)
  centroids<-data.frame(use.efit$factors$centroids)
  centroids$ethnicity<-substr(rownames(centroids),10,17)
  #centroids$NMDS2<-centroids$NMDS2+0.07
  locations <- data.frame(use.ord$points,use.env)
  locations<-merge(locations,centroids,by="ethnicity")
  
  uselocs<-data.frame(use.ord$species)
  
  ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
    geom_point(alpha=I(0.3))+
    geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
    theme_bw()+
    scale_x_continuous("")+
    scale_y_continuous("")+
    annotate(
      geom="text",
      x=centroids$NMDS1,
      y=centroids$NMDS2,
      label=centroids$ethnicity
    )+
    annotate(
      geom="text",
      x=uselocs$MDS1,
      y=uselocs$MDS2,
      label=rownames(uselocs),
      color="red"
    )

  #this is very interesting, big diffrence in Msihing and adivasi infs in mentioning ritual and craft uses
  
#plantuse plot
  plantuse.efit <-envfit(plantuse.ord,use.env[,"ethnicity"],na.rm=TRUE)
  centroids<-data.frame(plantuse.efit$factors$centroids)
  centroids$ethnicity<-substr(rownames(centroids),10,17)
  #centroids$NMDS2<-centroids$NMDS2+0.07
  locations <- data.frame(plantuse.ord$points,use.env)
  locations<-merge(locations,centroids,by="ethnicity")
  
  ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
    geom_point(alpha=I(0.3))+
    geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
    theme_bw()+
    scale_x_continuous("",limits=c(-3,3))+
    scale_y_continuous("")+
    annotate(
      geom="text",
      x=centroids$NMDS1,
      y=centroids$NMDS2,
      label=centroids$ethnicity
    )
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_segment).

#very clearly combines patterns seen in both of the plots above
  
#what about simple metrics by age or education level?
  assam %>% group_by(Informant.name) %>% summarize(
    plants=length(unique(Scientific.name)),
    plantuses=length(unique(paste0(Scientific.name,Use.Category))),
    age=unique(Informant.age),
    gender=unique(Informant.gender),
    education=unique(Education_rank),
    ethnicity=unique(Ethnicity)
) -> demographics
  
summary(lm(plantuses~age+education+ethnicity,data=demographics))
## 
## Call:
## lm(formula = plantuses ~ age + education + ethnicity, data = demographics)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4551 -2.5432 -0.6281  1.2783 27.8293 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        8.73350    1.01355   8.617  < 2e-16 ***
## age                0.08591    0.01648   5.212 2.96e-07 ***
## education          0.69714    0.26946   2.587    0.010 *  
## ethnicityAssamese -5.68350    1.02876  -5.525 5.87e-08 ***
## ethnicityBodo     -7.47081    1.03092  -7.247 2.13e-12 ***
## ethnicityGaro     -4.52011    0.95771  -4.720 3.24e-06 ***
## ethnicityMishing   0.16161    0.90856   0.178    0.859    
## ethnicityNepali   -6.32330    0.88915  -7.112 5.12e-12 ***
## ethnicityNyishi   -7.77246    0.86706  -8.964  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.545 on 411 degrees of freedom
## Multiple R-squared:  0.3169, Adjusted R-squared:  0.3036 
## F-statistic: 23.83 on 8 and 411 DF,  p-value: < 2.2e-16
qplot(age,plantuses,data=demographics,alpha=I(1/3),color=ethnicity)+theme_bw()+geom_smooth(method="lm")