library(tidyverse)
hist_tbl<-tibble::tribble(~Names, ~Climate.Change, ~Data.Science, ~Engineering.Design, ~ESG, ~Geochemistry, ~Goldsim, ~Hydraulics, ~Hydrology, ~Project.Management, ~Water.Management, ~Water.Treatment, ~Water.Quality, ~TOTAL,
"Andrea Bowie", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Anne Day", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Brandon Smith", NA, NA, NA, NA, NA, 5L, NA, NA, 3L, 1L, NA, 1L, 10L,
"Brooklyn Derry", NA, NA, NA, NA, 3L, 1L, NA, NA, NA, NA, 3L, 3L, 10L,
"Camilo Gallard", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Celine Michiels", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Charles Veilleux", NA, NA, 3L, NA, NA, NA, 2L, 1, 1L, 2L, NA, 1L, 10L,
"Christina James", NA, NA, NA, 2L, NA, 5L, NA, NA, NA, 3L, NA, NA, 10L,
"Gordon Johnston", 1L, NA, 1L, NA, NA, 2L, 2L, 1, NA, 3L, NA, NA, 10L,
"Harry Zhang", 2L, NA, NA, NA, NA, NA, 3L, 2, NA, 2L, NA, 1L, 10L,
"Jessie Watson", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Jordan Larkins", NA, NA, 3L, NA, NA, NA, 3L, NA, 1L, 3L, NA, NA, 10L,
"Lais Pereira", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Mark Sumka", 2L, NA, 2L, NA, NA, 1L, 2L, 3, NA, NA, NA, NA, 10L,
"Matthew Henderson", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Mauricio Herrera", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Michael Dabiri", 1L, NA, 3L, NA, NA, NA, 1L, 1, 1L, 3L, NA, NA, 10L,
"Nadine Shatilla", NA, NA, NA, NA, NA, 1L, NA, 5, 4L, NA, NA, NA, 10L,
"Nina Feng", NA, NA, 1L, NA, NA, 4L, 1L, 1, NA, 2L, NA, 1L, 10L,
"Noah Levin", NA, 1, NA, NA, NA, 3L, NA, NA, NA, 1L, 1L, 4L, 10L,
"Rajib Kamal", NA, NA, 1L, NA, NA, NA, 3L, 1, NA, 4L, NA, 1L, 10L,
"Rob Klein", NA, NA, NA, NA, NA, NA, NA, NA, 3L, 3L, 3L, 1L, 10L,
"Samantha Barnes", 1L, NA, 2L, 1L, NA, 2L, 1L, 1, NA, 2L, NA, NA, 10L,
"Shannon Hoekstra", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Sharlene Santiago", 2L, NA, NA, NA, NA, 1L, NA, NA, NA, NA, 1L, 6L, 10L,
"Simon Venter", NA, NA, 6L, NA, NA, NA, 3L, NA, NA, 1L, NA, NA, 10L,
"Simonne Mikolay", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Soren Jensen", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Tanbir Mahatab", NA, NA, NA, NA, NA, 4L, NA, 1, NA, 2L, 2L, 1L, 10L,
"Tony Fedec", NA, NA, NA, NA, 5L, NA, 1L, NA, NA, 4L, NA, NA, 10L,
"Victor Munoz", 2L, 4.5, NA, NA, NA, NA, 1L, 2.5, NA, NA, NA, NA, 10L
) %>%
dplyr::filter(TOTAL==10) %>%
dplyr::select(-TOTAL) %>%
reshape2::melt(id.vars="Names") %>%
mutate(source="hist")
proy_tbl<-tibble::tribble(
~Names, ~Climate.Change, ~Data.Science, ~Engineering.Design, ~ESG, ~Geochemistry, ~Goldsim, ~Hydraulics, ~Hydrology, ~Project.Management, ~Water.Management, ~Water.Treatment, ~Water.Quality, ~TOTAL,
"Andrea Bowie", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Anne Day", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Brandon Smith", NA, NA, NA, NA, NA, 5, NA, NA, 3L, 1L, NA, 1L, 10L,
"Brooklyn Derry", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Camilo Gallard", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Celine Michiels", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Charles Veilleux", NA, NA, 2L, NA, NA, 1, 2L, 1L, NA, 2L, NA, 2L, 10L,
"Christina James", NA, NA, NA, 3.333333333, NA, 3.333333333, NA, NA, NA, 3L, NA, NA, 10L,
"Gordon Johnston", 1L, NA, 2L, 1, NA, 2, 2L, NA, NA, 1L, NA, 1L, 10L,
"Harry Zhang", 2L, NA, NA, NA, NA, NA, 2L, 2L, NA, 3L, NA, 1L, 10L,
"Jessie Watson", NA, NA, NA, 1, NA, 1, NA, NA, 1L, 2L, 3L, 2L, 10L,
"Jordan Larkins", NA, NA, 3L, NA, NA, NA, 1L, NA, 4L, 2L, NA, NA, 10L,
"Lais Pereira", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Mark Sumka", 1L, NA, 2L, NA, NA, 1, 2L, 1L, NA, 2L, NA, 1L, 10L,
"Matthew Henderson", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Mauricio Herrera", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Michael Dabiri", 1L, NA, 2L, 2, NA, NA, 1L, 1L, NA, 3L, NA, NA, 10L,
"Nadine Shatilla", 1L, NA, NA, 1, NA, 1, NA, 4L, 2L, 1L, NA, NA, 10L,
"Nina Feng", NA, 1L, 1L, NA, NA, 3, 2L, NA, NA, 2L, NA, 1L, 10L,
"Noah Levin", NA, 2L, NA, NA, 1L, 2, NA, NA, NA, NA, 2L, 3L, 10L,
"Rajib Kamal", NA, NA, 2L, NA, NA, NA, 3L, 1L, NA, 3L, NA, 1L, 10L,
"Rob Klein", NA, NA, 3L, 2, NA, NA, NA, NA, NA, 3L, 2L, NA, 10L,
"Samantha Barnes", 1L, NA, 2L, 1, NA, 2, 1L, 1L, NA, 2L, NA, NA, 10L,
"Shannon Hoekstra", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Sharlene Santiago", 2L, 2L, NA, 1, NA, 1, NA, NA, NA, 1L, 2L, 1L, 10L,
"Simon Venter", NA, NA, 5L, NA, NA, NA, 3L, NA, NA, 2L, NA, NA, 10L,
"Simonne Mikolay", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Soren Jensen", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L,
"Tanbir Mahatab", NA, NA, 2L, NA, NA, 2, NA, 1L, NA, 1L, 2L, 2L, 10L,
"Tony Fedec", NA, NA, 3L, NA, NA, NA, 4L, NA, NA, 3L, NA, NA, 10L,
"Victor Munoz", 2L, 6L, NA, NA, NA, NA, NA, 2L, NA, NA, NA, NA, 10L
)%>%
dplyr::filter(TOTAL==10) %>%
dplyr::select(-TOTAL) %>%
reshape2::melt(id.vars="Names") %>%
mutate(source="proy")
all_tbl<-rbind(hist_tbl,proy_tbl)
all_sort_tbl<-all_tbl %>%
group_by(variable) %>%
summarize(value=sum(value,na.rm=T)) %>%
arrange(-value)all_tbl%>%
ggplot(data=.,aes(x=source,y=value,fill=Names))+
geom_bar(stat = "Identity")+
facet_wrap(fct_relevel(variable,all_sort_tbl$variable %>% as.character)~.,
drop = F)+
theme_light()+
labs(y="Magnitude")all_tbl %>%
group_by(variable,source) %>%
summarise(value=sum(value,na.rm=T)) %>%
reshape2::dcast(data=.,formula=variable~source,value.var = "value") %>%
arrange(-hist) %>%
mutate(difference=proy-hist) %>%
pandoc.table(round=0)| variable | hist | proy | difference |
|---|---|---|---|
| Water.Management | 36 | 37 | 1 |
| Goldsim | 29 | 24 | -5 |
| Hydraulics | 23 | 23 | 0 |
| Engineering.Design | 22 | 29 | 7 |
| Water.Quality | 20 | 16 | -4 |
| Hydrology | 20 | 14 | -6 |
| Project.Management | 13 | 10 | -3 |
| Climate.Change | 11 | 11 | 0 |
| Water.Treatment | 10 | 11 | 1 |
| Geochemistry | 8 | 1 | -7 |
| Data.Science | 6 | 11 | 6 |
| ESG | 3 | 12 | 9 |
all_tbl %>%
group_by(variable,source) %>%
summarise(value=sum(value,na.rm=T)) %>%
reshape2::dcast(data=.,formula=variable~source,value.var = "value") %>%
mutate(change=proy-hist) %>%
arrange(-change,-hist) %>%
pandoc.table(round=0)| variable | hist | proy | change |
|---|---|---|---|
| ESG | 3 | 12 | 9 |
| Engineering.Design | 22 | 29 | 7 |
| Data.Science | 6 | 11 | 6 |
| Water.Management | 36 | 37 | 1 |
| Water.Treatment | 10 | 11 | 1 |
| Hydraulics | 23 | 23 | 0 |
| Climate.Change | 11 | 11 | 0 |
| Project.Management | 13 | 10 | -3 |
| Water.Quality | 20 | 16 | -4 |
| Goldsim | 29 | 24 | -5 |
| Hydrology | 20 | 14 | -6 |
| Geochemistry | 8 | 1 | -7 |
library(cluster)
all_info_tbl<-all_tbl %>%
mutate(value=if_else(is.na(value),0,value)) %>%
# dplyr::filter(source=="hi)
reshape2::dcast(data=.,formula=Names+source~variable,value.var = "value") %>%
arrange(Names,source)
all_hist_tbl<-all_info_tbl %>%
dplyr::filter(source=="hist") %>%
dplyr::select(-source) %>%
column_to_rownames(var="Names")
wg_norm<-scale(all_hist_tbl)
dist<-dist(wg_norm)
wg_hclust<-hclust(dist,method="average")
plot(wg_hclust)datadistshortset<-dist(wg_norm,method = "euclidean")
hc1 <- hclust(datadistshortset, method = "complete" )
pamvshortset <- pam(datadistshortset,3, diss = FALSE)
clusplot(pamvshortset, shade = FALSE,labels=2,col.clus="blue",
col.p="red",span=FALSE,main="Cluster Mapping",cex=1.2)all_cluster<-data.frame(pamvshortset$clustering) %>%
rownames_to_column(var = "Names") %>%
dplyr::rename(cluster="pamvshortset.clustering")
left_join(all_tbl,all_cluster) %>%
mutate(variable=str_replace(variable,"\\."," ") %>% str_wrap(10)) %>%
dplyr::filter(source=="hist") %>%
mutate(value=if_else(is.na(value),0,value)) %>%
ggplot(data=.,aes(x=variable,y=value,fill=as.factor(round(value,0))))+
geom_col()+
facet_grid(Names~cluster)+
theme(strip.text.y.right = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 90,size=7))+
labs(fill="magnitude:",x=NULL,y=NULL,title="Team members divided by cluster",
subtitle="Historical work")library(cluster)
all_info_tbl<-all_tbl %>%
mutate(value=if_else(is.na(value),0,value)) %>%
# dplyr::filter(source=="hi)
reshape2::dcast(data=.,formula=Names+source~variable,value.var = "value") %>%
arrange(Names,source)
all_hist_tbl<-all_info_tbl %>%
dplyr::filter(source=="proy") %>%
dplyr::select(-source) %>%
column_to_rownames(var="Names")
wg_norm<-scale(all_hist_tbl)
dist<-dist(wg_norm)
wg_hclust<-hclust(dist,method="average")
plot(wg_hclust)datadistshortset<-dist(wg_norm,method = "euclidean")
hc1 <- hclust(datadistshortset, method = "complete" )
pamvshortset <- pam(datadistshortset,3, diss = FALSE)
clusplot(pamvshortset, shade = FALSE,labels=2,col.clus="blue",
col.p="red",span=FALSE,main="Cluster Mapping",cex=1.2)all_cluster<-data.frame(pamvshortset$clustering) %>%
rownames_to_column(var = "Names") %>%
dplyr::rename(cluster="pamvshortset.clustering")
left_join(all_tbl,all_cluster) %>%
mutate(variable=str_replace(variable,"\\."," ") %>% str_wrap(10)) %>%
dplyr::filter(source=="hist") %>%
mutate(value=if_else(is.na(value),0,value)) %>%
ggplot(data=.,aes(x=variable,y=value,fill=as.factor(round(value,0))))+
geom_col()+
facet_grid(Names~cluster)+
theme(strip.text.y.right = element_text(angle = 0))+
theme(axis.text.x = element_text(angle = 90))+
theme(axis.text.x = element_text(angle = 90,size=7))+
labs(fill="magnitude:",x=NULL,y=NULL,title="Team members divided by cluster",
subtitle="Expected work")file.image<-here::here("data","rds",paste0("Backup Meteorological review_",as.Date(now()),".rdata"))
save.image(file = file.image)