#Paper 1: Cross-cultural analysis
#Paper 2: Demographic (and linguistic??) - tie in Living Tongues institute?
#Load data
require(vegan)
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-2
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
require(reshape2)
## Loading required package: reshape2
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
community_dataset <- read_csv("~/Dropbox/p/East Himalaya/community dataset.csv")
## Parsed with column specification:
## cols(
## Sl.no = col_integer(),
## `Name of the Informant` = col_character(),
## Gender = col_character(),
## Age = col_integer(),
## Occupation = col_character(),
## Education = col_character(),
## `Local name of the plant` = col_character(),
## `Botanical name of the plant` = col_character(),
## `Used as` = col_character(),
## `Plant part used` = col_character(),
## `Disease treated` = col_character(),
## Habit = col_character(),
## `Mode of Preparation` = col_character(),
## `Mode of administration` = col_character(),
## Ethnicity = col_character()
## )
#clean data a bit
community_dataset %>% mutate(
Informant.name=`Name of the Informant`,
Informant.age=`Age`,
Informant.gender=`Gender`,
Scientific.name=`Botanical name of the plant`,
Use.Category=`Used as`
) -> assam
###Clean Demographics
##Age and gender ok
assam %>%
group_by(Informant.name) %>%
summarize(age=unique(Informant.age),
gender=unique(Informant.gender),
Education=unique(Education),
Ethnicity=unique(Ethnicity))
## # A tibble: 420 x 5
## Informant.name age gender Education Ethnicity
## <chr> <int> <chr> <chr> <chr>
## 1 Aaroti Das 40 Female H.S Assamese
## 2 Aarti Mili 24 Female Matriculate Mishing
## 3 Abhilesh Narzary 29 Male Graduate Bodo
## 4 Agni Munda 18 Female Class V Adivasi
## 5 Ajen Tati 35 Male Class 8 Adivasi
## 6 Ajit Borah 62 Male 12th pass Assamese
## 7 Ajit Naik 62 Male Nil Adivasi
## 8 Albert Marak 27 Male Matriculate Garo
## 9 Alimia Mili 24 Female Matriculate Mishing
## 10 Alina Orang 50 Female Nil Adivasi
## # ... with 410 more rows
qplot(assam$Age)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##need to standardize education
education_ranks <- read_csv("~/Dropbox/p/East Himalaya/education ranks.csv")
## Parsed with column specification:
## cols(
## Education = col_character(),
## Education_rank = col_integer()
## )
#beacuse I can't add in Office as I'm on a plane and need to activate, have to add in Matriculate manually
education_ranks[35,1]<-"Matriculate"
merge(assam,education_ranks,by="Education",all=T) -> assam
##Plant names
assam %>%
group_by(Scientific.name) %>%
summarize(n()) %>% dim()
## [1] 269 2
#Fix typos (could standardize against TROPICOS)
dplyr::recode(assam$Scientific.name,
'Ageratum conzoides'='Ageratum conizoides',
'Alocasia acuminata'='Alocassia acuminata',
'Alocasia cuminata'='Alocassia acuminata', #note check this
'Alocassia cuminata'='Alocassia acuminata', #note check this
'Aloe-vera'='Aloe vera',
'Alpinia niagra'='Alpinia nigra',
'Amarunthus spinosus'='Amaranthus spinosus',
'Ananas comosus'='Ananas cosmosus',
'Averrhoa carambola'='Averrhoea carambola',
'Bambusa sp'='Bambusa sp.',
'Cactus sp'='Cactus sp.',
'Capsicum sp'='Capsicum sp.',
'Ceratopteristhalictroides'='Ceratopteris thalictroides',
'Cissus quadangularis'='Cissus quadrangularis',
'Cissus quandrangularis'='Cissus quadrangularis',
'Citrus sp'='Citrus sp.',
'Colocasia affinis'='Colocassia affinis',
'Curcuma longa'='Cucurma longa',
'Cucurma caesia'='Curcuma caesia',
'Diplazium esculantumm'='Diplazium esculantum',
'Diplazium esculantum'='Diplazium esculantum',
'Ficus bengalenses'='Ficus bengalensis',
'Hibiscus rosa sinensis'='Hibiscus rosa-sinensis',
'Houttunya cordata'='Houttuynia cordata',
'Houtunnya cordata'='Houttuynia cordata',
'Hydrocotyle roundifolia'='Hydrocotyle rotundifolia',
'Ipomoea aquatica'='Ipomea aquatica',
'Momordica dioica'='Momordica dioca',
'Musa Balbisiana'='Musa balbisiana',
'Musa parasidica'='Musa paradisiaca',
'Musa sp'='Musa sp.',
'Nycthanthusarbor-tristis'='Nycthanthus arbortritis',
'Paederia foetida'='Paedaria foetida',
'Phlogocanthus thrysiformis'='Phlogocanthus thyrsiflorus',
'Piper Nigrum'='Piper nigrum',
'Saccharum Spontaneum'='Saccharum spontaneum',
'Sarcochlamy spulcherrima'='Sarcochlamys pulcherrima',
'Solonum indicum'='Solanum indicum',
'Spilanthus panniculata'='Spilanthus paniculata',
'Stenochlaenapalustris'='Stenochlaena palustris',
'Tagetes sp'='Tagetes sp.',
'Vitex neguno'='Vitex negundo',
'Zanthoxylamoxy phyllum'='Zanthoxylum oxyphyllum',
'Zingiber officinale'='Zingiber officinalis',
'Zizyphus marituana'='Ziziphus mauritiana'
) -> assam$Scientific.name
assam %>%
group_by(Scientific.name) %>%
summarize(n()) %>% dim() #nice, this fixed 39 synonyms
## [1] 230 2
##Use categories
assam %>%
group_by(Use.Category) %>%
summarize(n())
## # A tibble: 16 x 2
## Use.Category `n()`
## <chr> <int>
## 1 Broom 6
## 2 Constructing Apo/Saloni used for fishing 1
## 3 Food 2209
## 4 Food (used in rice beer) 4
## 5 Food/ Medicine 116
## 6 Food/medicine 1
## 7 Food/Medicine 207
## 8 Food/Ritual 3
## 9 Medicine 1114
## 10 Medicine/Ritual 5
## 11 Medicne 1
## 12 Ritual 215
## 13 Ritual/Medicine 4
## 14 Toli, Sakoni used for fishing 1
## 15 Wine 2
## 16 <NA> 1
#Standardize
dplyr::recode(assam$Use.Category,
'Broom'='Craft',
'Constructing Apo/Saloni used for fishing'='Craft',
'Food (used in rice beer)'='Food',
'Food/ Medicine'='Food and Medicine',
'Food/medicine'='Food and Medicine',
'Food/Medicine'='Food and Medicine',
'Food/Ritual'='Food and Ritual',
'Food/Ritual'='Food and Ritual',
'Medicne'='Medicine',
'Ritual/Medicine'='Ritual and Medicine',
'Medicine/Ritual'='Ritual and Medicine',
'Toli, Sakoni used for fishing'='Craft',
'Wine'='Food')-> assam$Use.Category
assam %>%
group_by(Use.Category) %>%
summarize(n())
## # A tibble: 8 x 2
## Use.Category `n()`
## <chr> <int>
## 1 Craft 8
## 2 Food 2215
## 3 Food and Medicine 324
## 4 Food and Ritual 3
## 5 Medicine 1115
## 6 Ritual 215
## 7 Ritual and Medicine 9
## 8 <NA> 1
#Expand "and" categories to multiple rows, so that a report of "Food and Medicine" becomes seperate Med and food reports
dim(assam)
## [1] 3890 21
Use_category_expansion <- read_csv("~/Documents/Use category expansion.csv")
## Parsed with column specification:
## cols(
## Use.Category = col_character(),
## Use.Category.Expanded = col_character()
## )
merge(assam,Use_category_expansion,by="Use.Category",all.x=T)->assam
dim(assam)
## [1] 4226 22
head(assam)
## Use.Category Education Sl.no Name of the Informant Gender Age
## 1 Craft Class 9 24 Tireswari Mili Female 32
## 2 Craft Nil 2 Deepika Munda Female 25
## 3 Craft Nil 43 Bimla Bhumij Female 68
## 4 Craft Nil 6 Nazira Mili Male 85
## 5 Craft Nil 22 Arun Gowala Male 50
## 6 Craft Nil 2 Deepika Munda Female 25
## Occupation Local name of the plant Botanical name of the plant
## 1 Housewife Eku/Bah Bambusa sp.
## 2 Housewife Datun Sida rhombifolia
## 3 Housewife Jharu gos Thysanolaena agrostis
## 4 Farmer Eku/Bah Bambusa sp
## 5 Daily wages Jharu gos Thysanolaena agrostis
## 6 Housewife Bedna <NA>
## Used as Plant part used
## 1 Toli, Sakoni used for fishing Shoot
## 2 Broom Branches and leaves
## 3 Broom Branches and leaves
## 4 Constructing Apo/Saloni used for fishing Shoot
## 5 Broom Branches and leaves
## 6 Broom Branches
## Disease treated Habit
## 1 <NA> Tree
## 2 <NA> Shrub
## 3 <NA> Shrub
## 4 <NA> Tree
## 5 <NA> Shrub
## 6 <NA> Shrub
## Mode of Preparation
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 First dry the parts for 6-7 days and then tie it in a bundle
## 6 <NA>
## Mode of administration Ethnicity Informant.name Informant.age
## 1 <NA> Mishing Tireswari Mili 32
## 2 <NA> Adivasi Deepika Munda 25
## 3 <NA> Adivasi Bimla Bhumij 68
## 4 <NA> Mishing Nazira Mili 85
## 5 <NA> Adivasi Arun Gowala 50
## 6 <NA> Adivasi Deepika Munda 25
## Informant.gender Scientific.name Education_rank
## 1 Female Bambusa sp. 3
## 2 Female Sida rhombifolia 0
## 3 Female Thysanolaena agrostis 0
## 4 Male Bambusa sp. 0
## 5 Male Thysanolaena agrostis 0
## 6 Female <NA> 0
## Use.Category.Expanded
## 1 Craft
## 2 Craft
## 3 Craft
## 4 Craft
## 5 Craft
## 6 Craft
#drop one plant with NA uses
assam[!is.na(assam$Use.Category.Expanded),]->assam
#make use category expanded into use category so I don't have to change code for now (could fix this later)
assam$Use.Category<-assam$Use.Category.Expanded
#make environmental matrix
assam %>%
group_by(Informant.name) %>%
summarize(gender=unique(Informant.gender),
age=unique(Informant.age),
ethnicity=unique(Ethnicity),
education=unique(Education_rank)
)-> plant.env
assam %>%
filter(!is.na(Use.Category))%>%
group_by(Informant.name) %>%
summarize(gender=unique(Informant.gender),
age=unique(Informant.age),
ethnicity=unique(Ethnicity),
education=unique(Education_rank)
)-> use.env
#make plant and use matrixes
assam %>%
#filter(!is.na(Use.Category))%>%
select(Informant.name,Scientific.name) %>%
dcast(Informant.name~Scientific.name) -> mat
## Using Scientific.name as value column: use value.var to override.
## Aggregation function missing: defaulting to length
row.names(mat) <- mat$Informant.name
mat <- mat[-1]
mat[is.na(mat)]<-0
mat->plant.mat
#This function will do the same for uses. I used straight use categories, because using plantuses gives me something exactly the same as using plants
assam %>%
filter(!is.na(Use.Category))%>%
select(Informant.name,Use.Category) %>%
dcast(Informant.name~Use.Category) -> mat
## Using Use.Category as value column: use value.var to override.
## Aggregation function missing: defaulting to length
row.names(mat) <- mat$Informant.name
mat <- mat[-1]
mat[is.na(mat)]<-0
mat->use.mat
#and plantuse
assam %>%
filter(!is.na(Use.Category))%>%
mutate(plantuse=paste(Scientific.name,Use.Category)) %>%
select(Informant.name,plantuse) %>%
dcast(Informant.name~plantuse) -> mat
## Using plantuse as value column: use value.var to override.
## Aggregation function missing: defaulting to length
row.names(mat) <- mat$Informant.name
mat <- mat[-1]
mat[is.na(mat)]<-0
mat->plantuse.mat
#how many dimensions should I use? (maybe 3 or 4?)
NMDS.scree<-function(x) { #where x is the name of the data frame variable
plot(rep(1,4),replicate(4,metaMDS(x,autotransform=T,k=1)$stress),xlim=c(1,6),ylim=c(0,0.5),xlab="# of Dimensions",ylab="Stress",main="NMDS stress plot")
for (i in 1:6) {
points(rep(i+1,4),replicate(4,metaMDS(x,distance="euclidian",autotransform=T,k=i+1)$stress))
}
}
#NMDS.scree(plant.mat)
#how many groups do I have? #2 or 5 groups
#plot(cascadeKM(vegdist(mat,method="euclidian"), 3, 6, iter = 500, criterion = "calinski")) (my computer too weak!)
#make mds with 2 dimensions
# mds<-metaMDS(mat,k=2,distance="euclidian",autotransform=T,trymax=50)
# ordiplot(mds,type='t')
plant.mat %>% metaMDS(dist="bray",k=3) -> plant.ord #vey good, stress <20
## Wisconsin double standardization
## Run 0 stress 0.1962313
## Run 1 stress 0.1962148
## ... New best solution
## ... Procrustes: rmse 0.01115025 max resid 0.07076447
## Run 2 stress 0.1968046
## Run 3 stress 0.1961903
## ... New best solution
## ... Procrustes: rmse 0.01170506 max resid 0.08831525
## Run 4 stress 0.1958962
## ... New best solution
## ... Procrustes: rmse 0.01226803 max resid 0.1425901
## Run 5 stress 0.2008338
## Run 6 stress 0.1960901
## ... Procrustes: rmse 0.009432989 max resid 0.07322056
## Run 7 stress 0.1982022
## Run 8 stress 0.1959133
## ... Procrustes: rmse 0.01031106 max resid 0.1288221
## Run 9 stress 0.1986733
## Run 10 stress 0.1967388
## Run 11 stress 0.1970135
## Run 12 stress 0.1993735
## Run 13 stress 0.1971877
## Run 14 stress 0.2013196
## Run 15 stress 0.1989059
## Run 16 stress 0.1982978
## Run 17 stress 0.1972811
## Run 18 stress 0.1973392
## Run 19 stress 0.1967464
## Run 20 stress 0.2020307
## *** No convergence -- monoMDS stopping criteria:
## 12: no. of iterations >= maxit
## 8: stress ratio > sratmax
use.mat %>% metaMDS(dist="euclidian") -> use.ord #check this
## Wisconsin double standardization
## Run 0 stress 0.0464434
## Run 1 stress 0.1901166
## Run 2 stress 0.1033631
## Run 3 stress 0.1893955
## Run 4 stress 0.1593276
## Run 5 stress 0.07025835
## Run 6 stress 0.1159641
## Run 7 stress 0.1680902
## Run 8 stress 0.171004
## Run 9 stress 0.07160334
## Run 10 stress 0.1369807
## Run 11 stress 0.05921883
## Run 12 stress 0.09413748
## Run 13 stress 0.1820808
## Run 14 stress 0.160971
## Run 15 stress 0.1897816
## Run 16 stress 0.18969
## Run 17 stress 0.1889496
## Run 18 stress 0.1900645
## Run 19 stress 0.1756312
## Run 20 stress 0.1841546
## *** No convergence -- monoMDS stopping criteria:
## 17: stress ratio > sratmax
## 3: scale factor of the gradient < sfgrmin
plantuse.mat %>% metaMDS(dist="euclidian") -> plantuse.ord #good, stress <20
## Run 0 stress 0.1511594
## Run 1 stress 0.1557177
## Run 2 stress 0.1525582
## Run 3 stress 0.1467797
## ... New best solution
## ... Procrustes: rmse 0.01740769 max resid 0.1337483
## Run 4 stress 0.153801
## Run 5 stress 0.152583
## Run 6 stress 0.155334
## Run 7 stress 0.1526096
## Run 8 stress 0.144863
## ... New best solution
## ... Procrustes: rmse 0.01535699 max resid 0.1374212
## Run 9 stress 0.1465494
## Run 10 stress 0.1583972
## Run 11 stress 0.1492531
## Run 12 stress 0.1478315
## Run 13 stress 0.1551337
## Run 14 stress 0.1494465
## Run 15 stress 0.1537329
## Run 16 stress 0.145503
## Run 17 stress 0.1507956
## Run 18 stress 0.1513942
## Run 19 stress 0.1524744
## Run 20 stress 0.1481116
## *** No convergence -- monoMDS stopping criteria:
## 6: no. of iterations >= maxit
## 14: stress ratio > sratmax
#all of these could probably use more runs ultimately
#Plantspace fit
plant.efit <-envfit(plant.ord,plant.env[,-1],na.rm=TRUE) #ethnicity and education (gender)
plant.efit
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## age 0.92450 -0.38119 0.0042 0.457
## education -0.62588 -0.77992 0.0985 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## genderFemale -0.0223 0.0559
## genderMale 0.0237 -0.0592
## ethnicityAdivasi -0.2188 0.2047
## ethnicityAssamese -0.4089 -0.3618
## ethnicityBodo -0.3563 -0.1195
## ethnicityGaro 0.2038 -0.3661
## ethnicityMishing 0.0905 0.2777
## ethnicityNepali -0.0097 0.5202
## ethnicityNyishi 0.6994 -0.1552
##
## Goodness of fit:
## r2 Pr(>r)
## gender 0.0090 0.021 *
## ethnicity 0.5263 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#Usespace fit
use.efit <-envfit(use.ord,use.env[-1],na.rm=TRUE) #ethnicity and education
use.efit
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## age 0.91412 -0.40543 0.0071 0.216
## education -0.17995 0.98368 0.1317 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## genderFemale 0.0014 -0.0247
## genderMale -0.0015 0.0262
## ethnicityAdivasi -0.1492 -0.3855
## ethnicityAssamese 0.1316 0.0892
## ethnicityBodo -0.2213 0.1327
## ethnicityGaro -0.1232 0.1470
## ethnicityMishing -0.1493 -0.1272
## ethnicityNepali 0.5017 0.0173
## ethnicityNyishi 0.0097 0.1265
##
## Goodness of fit:
## r2 Pr(>r)
## gender 0.0037 0.213
## ethnicity 0.4971 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#plantuse fit
plantuse.efit <-envfit(plantuse.ord,use.env[-1],na.rm=TRUE) #ethnicity and education
plantuse.efit
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## age 0.99623 0.08675 0.0104 0.111
## education -0.32915 -0.94428 0.0637 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## genderFemale 0.0149 0.0099
## genderMale -0.0158 -0.0105
## ethnicityAdivasi 0.1377 0.8743
## ethnicityAssamese -0.6244 0.0318
## ethnicityBodo -0.4657 -0.0773
## ethnicityGaro 0.9324 0.0715
## ethnicityMishing -0.0218 -0.8983
## ethnicityNepali -0.7033 0.1291
## ethnicityNyishi 0.7451 -0.1312
##
## Goodness of fit:
## r2 Pr(>r)
## gender 0.0002 0.918
## ethnicity 0.2914 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#Plantspace plot
plant.efit <-envfit(plant.ord,plant.env[,"ethnicity"],na.rm=TRUE)
centroids<-data.frame(plant.efit$factors$centroids)
centroids$ethnicity<-substr(rownames(centroids),10,17)
#centroids$NMDS2<-centroids$NMDS2+0.07
locations <- data.frame(plant.ord$points,plant.env)
locations<-merge(locations,centroids,by="ethnicity")
ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
geom_point(alpha=I(0.3))+
geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
theme_bw()+
#scale_x_continuous("",limits=c(-1.5,1.5))+
#scale_y_continuous("",limits=c(-1.5,1.5))+
annotate(
geom="text",
x=centroids$NMDS1,
y=centroids$NMDS2,
label=centroids$ethnicity
)

#zoom
ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
geom_point(alpha=I(0.3))+
geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
theme_bw()+
scale_x_continuous("",limits=c(-1.5,1))+
scale_y_continuous("",limits=c(-1.5,1.5))+
annotate(
geom="text",
x=centroids$NMDS1,
y=centroids$NMDS2,
label=centroids$ethnicity
)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_segment).

#summary: Nyishi divergent, Msihing very tight, others generally overlapping
#Usespace plot
use.efit <-envfit(use.ord,use.env[,"ethnicity"],na.rm=TRUE)
centroids<-data.frame(use.efit$factors$centroids)
centroids$ethnicity<-substr(rownames(centroids),10,17)
#centroids$NMDS2<-centroids$NMDS2+0.07
locations <- data.frame(use.ord$points,use.env)
locations<-merge(locations,centroids,by="ethnicity")
uselocs<-data.frame(use.ord$species)
ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
geom_point(alpha=I(0.3))+
geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
theme_bw()+
scale_x_continuous("")+
scale_y_continuous("")+
annotate(
geom="text",
x=centroids$NMDS1,
y=centroids$NMDS2,
label=centroids$ethnicity
)+
annotate(
geom="text",
x=uselocs$MDS1,
y=uselocs$MDS2,
label=rownames(uselocs),
color="red"
)

#this is very interesting, big diffrence in Msihing and adivasi infs in mentioning ritual and craft uses
#plantuse plot
plantuse.efit <-envfit(plantuse.ord,use.env[,"ethnicity"],na.rm=TRUE)
centroids<-data.frame(plantuse.efit$factors$centroids)
centroids$ethnicity<-substr(rownames(centroids),10,17)
#centroids$NMDS2<-centroids$NMDS2+0.07
locations <- data.frame(plantuse.ord$points,use.env)
locations<-merge(locations,centroids,by="ethnicity")
ggplot(locations, aes(MDS1,MDS2,colour=ethnicity))+
geom_point(alpha=I(0.3))+
geom_segment(aes(x=MDS1,xend=NMDS1,y=MDS2,yend=NMDS2),alpha=I(0.3))+
theme_bw()+
scale_x_continuous("",limits=c(-3,3))+
scale_y_continuous("")+
annotate(
geom="text",
x=centroids$NMDS1,
y=centroids$NMDS2,
label=centroids$ethnicity
)
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_segment).

#very clearly combines patterns seen in both of the plots above
#what about simple metrics by age or education level?
assam %>% group_by(Informant.name) %>% summarize(
plants=length(unique(Scientific.name)),
plantuses=length(unique(paste0(Scientific.name,Use.Category))),
age=unique(Informant.age),
gender=unique(Informant.gender),
education=unique(Education_rank),
ethnicity=unique(Ethnicity)
) -> demographics
summary(lm(plantuses~age+education+ethnicity,data=demographics))
##
## Call:
## lm(formula = plantuses ~ age + education + ethnicity, data = demographics)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4551 -2.5432 -0.6281 1.2783 27.8293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.73350 1.01355 8.617 < 2e-16 ***
## age 0.08591 0.01648 5.212 2.96e-07 ***
## education 0.69714 0.26946 2.587 0.010 *
## ethnicityAssamese -5.68350 1.02876 -5.525 5.87e-08 ***
## ethnicityBodo -7.47081 1.03092 -7.247 2.13e-12 ***
## ethnicityGaro -4.52011 0.95771 -4.720 3.24e-06 ***
## ethnicityMishing 0.16161 0.90856 0.178 0.859
## ethnicityNepali -6.32330 0.88915 -7.112 5.12e-12 ***
## ethnicityNyishi -7.77246 0.86706 -8.964 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.545 on 411 degrees of freedom
## Multiple R-squared: 0.3169, Adjusted R-squared: 0.3036
## F-statistic: 23.83 on 8 and 411 DF, p-value: < 2.2e-16
qplot(age,plantuses,data=demographics,alpha=I(1/3),color=ethnicity)+theme_bw()+geom_smooth(method="lm")
