Question 1:
msleep <- read.csv("C:/Users/ABHIRAM/Downloads/msleep.csv")
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked _by_ '.GlobalEnv':
##
## msleep
# Grouping and Summarization:-
# Grouping by 'vore' (diet) and calculate the mean sleep_total for each diet group
diet_grouped <- msleep %>%
group_by(vore) %>%
summarise(mean_sleep_total = mean(sleep_total, na.rm = TRUE))
diet_grouped
## # A tibble: 5 × 2
## vore mean_sleep_total
## <chr> <dbl>
## 1 carni 10.4
## 2 herbi 9.51
## 3 insecti 14.9
## 4 omni 10.9
## 5 <NA> 10.2
# Grouping by 'conservation' status and calculate the mean bodywt for each status group
conservation_grouped <- msleep %>%
group_by(conservation) %>%
summarise(mean_bodywt = mean(bodywt, na.rm = TRUE))
conservation_grouped
## # A tibble: 7 × 2
## conservation mean_bodywt
## <chr> <dbl>
## 1 cd 850.
## 2 domesticated 147.
## 3 en 692.
## 4 lc 8.05
## 5 nt 25.4
## 6 vu 1026.
## 7 <NA> 11.9
# Grouping by 'order' and calculate the mean brainwt for each order group
order_grouped <- msleep %>%
group_by(order) %>%
summarise(mean_brainwt = mean(brainwt, na.rm = TRUE))
order_grouped
## # A tibble: 19 × 2
## order mean_brainwt
## <chr> <dbl>
## 1 Afrosoricida 0.0026
## 2 Artiodactyla 0.198
## 3 Carnivora 0.0986
## 4 Cetacea NaN
## 5 Chiroptera 0.000275
## 6 Cingulata 0.0459
## 7 Didelphimorphia 0.0063
## 8 Diprotodontia 0.0114
## 9 Erinaceomorpha 0.00295
## 10 Hyracoidea 0.0152
## 11 Lagomorpha 0.0121
## 12 Monotremata 0.025
## 13 Perissodactyla 0.414
## 14 Pilosa NaN
## 15 Primates 0.254
## 16 Proboscidea 5.16
## 17 Rodentia 0.00357
## 18 Scandentia 0.0025
## 19 Soricomorpha 0.000592
# Calculating Expected Probability:-
# Calculating the proportion of animals in each diet group
diet_grouped <- diet_grouped %>%
mutate(probability = mean_sleep_total / sum(mean_sleep_total))
# Assign an "anomaly" tag to the group with the lowest probability
diet_grouped <- diet_grouped %>%
mutate(anomaly_tag = ifelse(probability == min(probability), "Anomaly", "Normal"))
diet_grouped
## # A tibble: 5 × 4
## vore mean_sleep_total probability anomaly_tag
## <chr> <dbl> <dbl> <chr>
## 1 carni 10.4 0.186 Normal
## 2 herbi 9.51 0.170 Anomaly
## 3 insecti 14.9 0.267 Normal
## 4 omni 10.9 0.195 Normal
## 5 <NA> 10.2 0.182 Normal
# Calculating the proportion of animals in each conservation group
conservation_grouped <- conservation_grouped %>%
mutate(probability = mean_bodywt / sum(mean_bodywt))
# Assign an "anomaly" tag to the group with the lowest probability
conservation_grouped <- conservation_grouped %>%
mutate(anomaly_tag = ifelse(probability == min(probability), "Anomaly", "Normal"))
conservation_grouped
## # A tibble: 7 × 4
## conservation mean_bodywt probability anomaly_tag
## <chr> <dbl> <dbl> <chr>
## 1 cd 850. 0.308 Normal
## 2 domesticated 147. 0.0533 Normal
## 3 en 692. 0.251 Normal
## 4 lc 8.05 0.00291 Anomaly
## 5 nt 25.4 0.00920 Normal
## 6 vu 1026. 0.372 Normal
## 7 <NA> 11.9 0.00432 Normal
# Calculating the proportion of animals in each order group
order_grouped <- order_grouped %>%
mutate(probability = mean_brainwt / sum(mean_brainwt))
# Assign an "anomaly" tag to the group with the lowest probability
order_grouped <- order_grouped %>%
mutate(anomaly_tag = ifelse(probability == min(probability), "Anomaly", "Normal"))
order_grouped
## # A tibble: 19 × 4
## order mean_brainwt probability anomaly_tag
## <chr> <dbl> <dbl> <lgl>
## 1 Afrosoricida 0.0026 NaN NA
## 2 Artiodactyla 0.198 NaN NA
## 3 Carnivora 0.0986 NaN NA
## 4 Cetacea NaN NaN NA
## 5 Chiroptera 0.000275 NaN NA
## 6 Cingulata 0.0459 NaN NA
## 7 Didelphimorphia 0.0063 NaN NA
## 8 Diprotodontia 0.0114 NaN NA
## 9 Erinaceomorpha 0.00295 NaN NA
## 10 Hyracoidea 0.0152 NaN NA
## 11 Lagomorpha 0.0121 NaN NA
## 12 Monotremata 0.025 NaN NA
## 13 Perissodactyla 0.414 NaN NA
## 14 Pilosa NaN NaN NA
## 15 Primates 0.254 NaN NA
## 16 Proboscidea 5.16 NaN NA
## 17 Rodentia 0.00357 NaN NA
## 18 Scandentia 0.0025 NaN NA
## 19 Soricomorpha 0.000592 NaN NA
#Visualising the groups:-
# 1. Bar Chart:
# Bar chart for diet group
ggplot(diet_grouped, aes(x = vore, y = mean_sleep_total)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(x = "Diet", y = "Mean Sleep Total")
# Bar chart for conservation status group
ggplot(conservation_grouped, aes(x = conservation, y = mean_bodywt)) +
geom_bar(stat = "identity", fill = "green") +
labs(x = "Conservation Status", y = "Mean Body Weight")
# Bar chart for order group
ggplot(order_grouped, aes(x = order, y = mean_brainwt)) +
geom_bar(stat = "identity", fill = "orange") +
labs(x = "Order", y = "Mean Brain Weight")
## Warning: Removed 2 rows containing missing values (`position_stack()`).
# Testable Hypothesis:
# Hypothesis: Some groups are rarer than others due to specific ecological or biological factors that influence the sleep patterns of animals.
# 1. Body Size: Body size can influence sleep patterns. Larger animals might need more sleep, while smaller animals might sleep less. This could vary across orders.
# 2. Diet: Carnivorous animals may have different sleep patterns compared to herbivores or omnivores. Some orders might consist mostly of carnivorous species, while others have herbivorous or omnivorous species.
# 3. Brain Size: Brain size relative to body size can impact sleep patterns. Orders with larger-brained species may have different sleep needs.
Question 2:
# Let's pick two categorical variables, "vore" and "order,":
# Combinations that Never Show Up:
# Some combinations never show up because certain diet types may not be compatible with certain taxonomic orders. For example, there are no entries for "Carnivora" and "herbi" (Carnivorous mammals that are herbivores) or "Primates" and "carni" (Primates that are carnivores). This absence reflects the biological characteristics of these animals.
# Most/Least Common Combinations:
# The most common combination appears to be "herbi" (Herbivore) with "Rodentia" (Order containing rodents). This is likely because many rodents are herbivorous.
# "Carni" (Carnivore) with "Carnivora" (Order containing carnivorous mammals) is also a relatively common combination, reflecting the typical diet of carnivorous mammals.
# "herbi" (Herbivore) with "Artiodactyla" (Order containing even-toed ungulates) is also fairly common, as many ungulates are herbivorous.
# "omni" (Omnivore) appears in various combinations with different orders, which is expected given that omnivores can have diverse diets.
# "NA" (Not Available) values in "vore" indicate missing diet information for some species, which is why they are present in various combinations with different orders.