Student Details

https://www.kaggle.com/zhangjuefei/birds-bones-and-living-habits

About

According to their living environments and living habits, birds are classified into different ecological groups. There are 8 ecological groups of birds:

First 6 groups are main and are covered by this dataset.

There are 420 birds contained in this dataset. Each bird is represented by 10 measurements (features):

# Load your packages
library(tidyr)
library(dplyr)
library(knitr)
library(GGally)
library(ggplot2)
library(ggridges)

Data

# Load your data and prepare for visualisation
birdDS <- read.csv("bird.csv")

Exploring the Data

# Load your data and prepare for visualisation
head(birdDS)
colnames(birdDS)
##  [1] "id"    "huml"  "humw"  "ulnal" "ulnaw" "feml"  "femw"  "tibl" 
##  [9] "tibw"  "tarl"  "tarw"  "type"
colNamesFull <- c("ID", "HumerusLength", "HumerusDiameter", "UlnaLength", "UlnaDiameter",
                  "FemurLength", "FemurDiameter", "TibiotarusLength", "TibiotarusDiameter",
                  "TarsometatarusLength", "TarsometatarusDiameter", "EcologicalBirdType")
# Load your data and prepare for visualisation
summary(birdDS)
##        id             huml             humw            ulnal       
##  Min.   :  0.0   Min.   :  9.85   Min.   : 1.140   Min.   : 14.09  
##  1st Qu.:104.8   1st Qu.: 25.17   1st Qu.: 2.190   1st Qu.: 28.05  
##  Median :209.5   Median : 44.18   Median : 3.500   Median : 43.71  
##  Mean   :209.5   Mean   : 64.65   Mean   : 4.371   Mean   : 69.12  
##  3rd Qu.:314.2   3rd Qu.: 90.31   3rd Qu.: 5.810   3rd Qu.: 97.52  
##  Max.   :419.0   Max.   :420.00   Max.   :17.840   Max.   :422.00  
##                  NA's   :1        NA's   :1        NA's   :3       
##      ulnaw             feml             femw             tibl       
##  Min.   : 1.000   Min.   : 11.83   Min.   : 0.930   Min.   :  5.50  
##  1st Qu.: 1.870   1st Qu.: 21.30   1st Qu.: 1.715   1st Qu.: 36.42  
##  Median : 2.945   Median : 31.13   Median : 2.520   Median : 52.12  
##  Mean   : 3.597   Mean   : 36.87   Mean   : 3.221   Mean   : 64.66  
##  3rd Qu.: 4.770   3rd Qu.: 47.12   3rd Qu.: 4.135   3rd Qu.: 82.87  
##  Max.   :12.000   Max.   :117.07   Max.   :11.640   Max.   :240.00  
##  NA's   :2        NA's   :2        NA's   :1        NA's   :2       
##       tibw             tarl             tarw        type    
##  Min.   : 0.870   Min.   :  7.77   Min.   : 0.660   P : 38  
##  1st Qu.: 1.565   1st Qu.: 23.04   1st Qu.: 1.425   R : 50  
##  Median : 2.490   Median : 31.74   Median : 2.230   SO:128  
##  Mean   : 3.182   Mean   : 39.23   Mean   : 2.930   SW:116  
##  3rd Qu.: 4.255   3rd Qu.: 50.25   3rd Qu.: 3.500   T : 23  
##  Max.   :11.030   Max.   :175.00   Max.   :14.090   W : 65  
##  NA's   :1        NA's   :1        NA's   :1
# Load your data and prepare for visualisation
str(birdDS)
## 'data.frame':    420 obs. of  12 variables:
##  $ id   : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ huml : num  80.8 88.9 80 77.7 62.8 ...
##  $ humw : num  6.68 6.63 6.37 5.7 4.84 ...
##  $ ulnal: num  72 80.5 69.3 65.8 52.1 ...
##  $ ulnaw: num  4.88 5.59 5.28 4.77 3.73 3.47 4.5 4.55 6.13 7.05 ...
##  $ feml : num  41.8 47 43.1 40 34 ...
##  $ femw : num  3.7 4.3 3.9 3.52 2.72 4.41 3.41 3.78 5.45 7.44 ...
##  $ tibl : num  5.5 80.2 75.3 69.2 56.3 ...
##  $ tibw : num  4.03 4.51 4.04 3.4 2.96 2.73 3.56 3.81 5.58 7.31 ...
##  $ tarl : num  38.7 41.5 38.3 35.8 31.9 ...
##  $ tarw : num  3.84 4.01 3.34 3.41 3.13 2.83 3.64 3.81 4.37 6.34 ...
##  $ type : Factor w/ 6 levels "P","R","SO","SW",..: 4 4 4 4 4 4 4 4 4 4 ...
# Load your data and prepare for visualisation
head(birdDS)
# Check the species ID
unique(birdDS$type)
## [1] SW W  T  R  P  SO
## Levels: P R SO SW T W

Determine the mean values

# SW - Swimming Birds
SWbirds <- subset(birdDS, birdDS$type == "SW") # SW - Swimming Birds
Wbirds <- subset(birdDS, birdDS$type == "W") # W - Wading Birds
Tbirds <- subset(birdDS, birdDS$type == "T") # T - Terrestrial Birds
Rbirds <- subset(birdDS, birdDS$type == "R") # R - Raptors
Pbirds <- subset(birdDS, birdDS$type == "P") # P - Scansorial Birds
SObirds <- subset(birdDS, birdDS$type == "SO") # SO - Singing Birds
# Filling NA - Rbirds
Wbirds$feml[is.na(Wbirds$feml)] <- mean(Wbirds$feml, na.rm = TRUE)
Wbirds$femw[is.na(Wbirds$femw)] <- mean(Wbirds$femw, na.rm = TRUE)
# Filling NA - Rbirds
Rbirds$ulnal[is.na(Rbirds$ulnal)] <- mean(Rbirds$ulnal, na.rm = TRUE)
Rbirds$ulnaw[is.na(Rbirds$ulnaw)] <- mean(Rbirds$ulnaw, na.rm = TRUE)
Rbirds$tarl[is.na(Rbirds$tarl)] <- mean(Rbirds$tarl, na.rm = TRUE)
Rbirds$tarw[is.na(Rbirds$tarw)] <- mean(Rbirds$tarw, na.rm = TRUE)
# Filling NA - SObirds
SObirds$huml[is.na(SObirds$huml)] <- mean(SObirds$huml, na.rm = TRUE)
SObirds$humw[is.na(SObirds$humw)] <- mean(SObirds$humw, na.rm = TRUE)
SObirds$ulnal[is.na(SObirds$ulnal)] <- mean(SObirds$ulnal, na.rm = TRUE)
SObirds$ulnaw[is.na(SObirds$ulnaw)] <- mean(SObirds$ulnaw, na.rm = TRUE)
SObirds$feml[is.na(SObirds$feml)] <- mean(SObirds$feml, na.rm = TRUE)
SObirds$tibl[is.na(SObirds$tibl)] <- mean(SObirds$tibl, na.rm = TRUE)
SObirds$tibw[is.na(SObirds$tibw)] <- mean(SObirds$tibw, na.rm = TRUE)
birdDS_clean <- rbind(SWbirds, Wbirds, Tbirds, Rbirds, Pbirds, SObirds)
summary(birdDS_clean)
##        id             huml             humw            ulnal       
##  Min.   :  0.0   Min.   :  9.85   Min.   : 1.140   Min.   : 14.09  
##  1st Qu.:104.8   1st Qu.: 25.04   1st Qu.: 2.188   1st Qu.: 28.00  
##  Median :209.5   Median : 44.08   Median : 3.495   Median : 43.51  
##  Mean   :209.5   Mean   : 64.55   Mean   : 4.365   Mean   : 68.99  
##  3rd Qu.:314.2   3rd Qu.: 90.22   3rd Qu.: 5.805   3rd Qu.: 97.56  
##  Max.   :419.0   Max.   :420.00   Max.   :17.840   Max.   :422.00  
##      ulnaw             feml             femw             tibl       
##  Min.   : 1.000   Min.   : 11.83   Min.   : 0.930   Min.   :  5.50  
##  1st Qu.: 1.867   1st Qu.: 21.33   1st Qu.: 1.718   1st Qu.: 36.34  
##  Median : 2.945   Median : 31.13   Median : 2.525   Median : 51.94  
##  Mean   : 3.596   Mean   : 36.84   Mean   : 3.221   Mean   : 64.53  
##  3rd Qu.: 4.772   3rd Qu.: 47.10   3rd Qu.: 4.122   3rd Qu.: 82.87  
##  Max.   :12.000   Max.   :117.07   Max.   :11.640   Max.   :240.00  
##       tibw             tarl             tarw        type    
##  Min.   : 0.870   Min.   :  7.77   Min.   : 0.660   P : 38  
##  1st Qu.: 1.560   1st Qu.: 23.04   1st Qu.: 1.427   R : 50  
##  Median : 2.490   Median : 31.81   Median : 2.235   SO:128  
##  Mean   : 3.178   Mean   : 39.28   Mean   : 2.935   SW:116  
##  3rd Qu.: 4.253   3rd Qu.: 50.46   3rd Qu.: 3.522   T : 23  
##  Max.   :11.030   Max.   :175.00   Max.   :14.090   W : 65

Visualisation

colnames(birdDS_clean)
##  [1] "id"    "huml"  "humw"  "ulnal" "ulnaw" "feml"  "femw"  "tibl" 
##  [9] "tibw"  "tarl"  "tarw"  "type"
allMeasurements <- c("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw")
lengthMeasurements <- c("huml", "ulnal", "feml",  "tibl",  "tarl")
widthMeasurements <- c("humw", "ulnaw",  "femw", "tibw", "tarw")
test_reorder <- birdDS_clean[,c(12,2,3,4,5,6,7,8,9,10,11,1)]
test <- birdDS_clean %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "id", value = "measurement")
SWbirdsLong <- SWbirds %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "type", value = "measurement")
SWbirdsLong$id <- 'SW'
WbirdsLong <- Wbirds %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "type", value = "measurement")
WbirdsLong$id <- 'W'
TbirdsLong <- Tbirds %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "type", value = "measurement")
TbirdsLong$id <- 'T'
RbirdsLong <- Rbirds %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "type", value = "measurement")
RbirdsLong$id <- 'R'
PbirdsLong <- Pbirds %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "type", value = "measurement")
PbirdsLong$id <- 'P'
SObirdsLong <- SObirds %>% gather("huml", "humw", "ulnal", "ulnaw", "feml",  "femw",  "tibl",  "tibw",  "tarl",  "tarw", 
                   key = "type", value = "measurement")
SObirdsLong$id <- 'SO'
birdDS_Long <- rbind(SWbirdsLong, WbirdsLong, TbirdsLong, RbirdsLong, PbirdsLong, SObirdsLong)
summary(birdDS_Long)
##       id                type            measurement    
##  Length:4200        Length:4200        Min.   :  0.66  
##  Class :character   Class :character   1st Qu.:  2.67  
##  Mode  :character   Mode  :character   Median : 12.47  
##                                        Mean   : 29.15  
##                                        3rd Qu.: 38.50  
##                                        Max.   :422.00
colnames(birdDS_Long) <- c("birdType", "bone", "measurement")
birdDS_Long$measurementType <- sapply(strsplit(as.character(birdDS_Long$bone), ""), tail, 1)
birdDS_Long$boneAlone = substr(birdDS_Long$bone,1,nchar(birdDS_Long$bone)-1)
birdDS_Long$boneAlone <- factor(birdDS_Long$boneAlone)
levels(birdDS_Long$boneAlone) <- c("Femur", "Humerus", "Tarsometatarus", "Tibiotarsus", "Ulna")
levels(birdDS_Long$boneAlone)
## [1] "Femur"          "Humerus"        "Tarsometatarus" "Tibiotarsus"   
## [5] "Ulna"
png('birds.png', units="in", width=32, height=18, res=300)
birdsPlot <- ggplot(data = birdDS_Long, 
                    aes(x=birdDS_Long$measurement, 
                        y = birdType, 
                        fill = measurementType))
birdsPlot <- birdsPlot + geom_density_ridges2(scale = 0.9, alpha=0.7, colour = 'white', size = 0.5) +
  labs(y = NULL, x = "Measurement (mm)", 
       title = "Bird Bone Measurements",
       subtitle = "Alistair Grevis-James s3644119",
       caption="Source: kaggle.com/zhangjuefei/birds-bones-and-living-habits.") +
  theme(plot.title=element_text(size=62, 
                                    face="bold", 
                                    family="American Typewriter",
                                    color="grey30"),
        plot.subtitle=element_text(size=30, 
                                    family="American Typewriter",
                                    color="grey30"),
        plot.caption = element_text(size=24, 
                                   color="grey30"),
        axis.title.x=element_text(vjust=0,
                                  face = "bold",
                                  family="American Typewriter",
                                  size=30),
        axis.text.y = element_text(vjust = 0, 
                                   family="American Typewriter",
                                   size = 30),
        axis.text.x = element_text(size = 20,
                                   family="American Typewriter"),
        strip.text.x = element_text(size = 30,
                                   family="American Typewriter"),
        legend.text = element_text(size=24)) +
  guides(fill = guide_legend(keywidth = 2.5, keyheight = 2.5)) +
  scale_x_continuous(expand = c(0.01, 0), breaks = c(0, 25, 50, 75, 100, 125)) +
  coord_cartesian(xlim = c(0, 150)) +  
  scale_y_discrete(expand = c(0.01, 0), labels=c("Scansorial Birds", "Raptors", 
                                                   "Song Birds", "Swimming Birds", 
                                                   "Terrestrial Birds", "Wading Birds")) + 
  facet_grid(.~boneAlone) +
  theme(legend.title = element_blank()) +
  theme(legend.position = c(0.95, 0.09)) +
  scale_fill_manual(values = c("#ff7f00", "#377eb8"), labels=c("Length", "Width"))

birdsPlot
dev.off()
## quartz_off_screen 
##                 2
birdsPlot
The figure shows a series of faceted density plots of bird bone measurements. The measurements are of either length in orange or diameter (width) in blue – all measurements are in mm. The plots are faceted by bone type, and each row represents a type of bird. The visualisation is highly effective at showing the relation between length and diameter of a particular bone, within a particular bird type. The visualisation also allows for distribution comparisons between bird groups and for distribution comparisons between bone types. Starting in the top left-hand corner, we see for example that femur measurements of wading birds are very widely distributed. If we look to terrestrial birds, we see the femur length and width are extremely highly correlated. This correlation can then be compared with the other bones measured.

The figure shows a series of faceted density plots of bird bone measurements. The measurements are of either length in orange or diameter (width) in blue – all measurements are in mm. The plots are faceted by bone type, and each row represents a type of bird. The visualisation is highly effective at showing the relation between length and diameter of a particular bone, within a particular bird type. The visualisation also allows for distribution comparisons between bird groups and for distribution comparisons between bone types. Starting in the top left-hand corner, we see for example that femur measurements of wading birds are very widely distributed. If we look to terrestrial birds, we see the femur length and width are extremely highly correlated. This correlation can then be compared with the other bones measured.