Question 1:

msleep <- read.csv("C:/Users/ABHIRAM/Downloads/msleep.csv")

library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked _by_ '.GlobalEnv':
## 
##     msleep
# Grouping and Summarization:-

# Grouping by 'vore' (diet) and calculate the mean sleep_total for each diet group
diet_grouped <- msleep %>%
  group_by(vore) %>%
  summarise(mean_sleep_total = mean(sleep_total, na.rm = TRUE))

diet_grouped
## # A tibble: 5 × 2
##   vore    mean_sleep_total
##   <chr>              <dbl>
## 1 carni              10.4 
## 2 herbi               9.51
## 3 insecti            14.9 
## 4 omni               10.9 
## 5 <NA>               10.2
# Grouping by 'conservation' status and calculate the mean bodywt for each status group
conservation_grouped <- msleep %>%
  group_by(conservation) %>%
  summarise(mean_bodywt = mean(bodywt, na.rm = TRUE))

conservation_grouped
## # A tibble: 7 × 2
##   conservation mean_bodywt
##   <chr>              <dbl>
## 1 cd                850.  
## 2 domesticated      147.  
## 3 en                692.  
## 4 lc                  8.05
## 5 nt                 25.4 
## 6 vu               1026.  
## 7 <NA>               11.9
# Grouping by 'order' and calculate the mean brainwt for each order group
order_grouped <- msleep %>%
  group_by(order) %>%
  summarise(mean_brainwt = mean(brainwt, na.rm = TRUE))

order_grouped
## # A tibble: 19 × 2
##    order           mean_brainwt
##    <chr>                  <dbl>
##  1 Afrosoricida        0.0026  
##  2 Artiodactyla        0.198   
##  3 Carnivora           0.0986  
##  4 Cetacea           NaN       
##  5 Chiroptera          0.000275
##  6 Cingulata           0.0459  
##  7 Didelphimorphia     0.0063  
##  8 Diprotodontia       0.0114  
##  9 Erinaceomorpha      0.00295 
## 10 Hyracoidea          0.0152  
## 11 Lagomorpha          0.0121  
## 12 Monotremata         0.025   
## 13 Perissodactyla      0.414   
## 14 Pilosa            NaN       
## 15 Primates            0.254   
## 16 Proboscidea         5.16    
## 17 Rodentia            0.00357 
## 18 Scandentia          0.0025  
## 19 Soricomorpha        0.000592
# Calculating Expected Probability:-

# Calculating the proportion of animals in each diet group
diet_grouped <- diet_grouped %>%
  mutate(probability = mean_sleep_total / sum(mean_sleep_total))

# Assign an "anomaly" tag to the group with the lowest probability
diet_grouped <- diet_grouped %>%
  mutate(anomaly_tag = ifelse(probability == min(probability), "Anomaly", "Normal"))

diet_grouped
## # A tibble: 5 × 4
##   vore    mean_sleep_total probability anomaly_tag
##   <chr>              <dbl>       <dbl> <chr>      
## 1 carni              10.4        0.186 Normal     
## 2 herbi               9.51       0.170 Anomaly    
## 3 insecti            14.9        0.267 Normal     
## 4 omni               10.9        0.195 Normal     
## 5 <NA>               10.2        0.182 Normal
# Calculating the proportion of animals in each conservation group
conservation_grouped <- conservation_grouped %>%
  mutate(probability = mean_bodywt / sum(mean_bodywt))

# Assign an "anomaly" tag to the group with the lowest probability
conservation_grouped <- conservation_grouped %>%
  mutate(anomaly_tag = ifelse(probability == min(probability), "Anomaly", "Normal"))

conservation_grouped
## # A tibble: 7 × 4
##   conservation mean_bodywt probability anomaly_tag
##   <chr>              <dbl>       <dbl> <chr>      
## 1 cd                850.       0.308   Normal     
## 2 domesticated      147.       0.0533  Normal     
## 3 en                692.       0.251   Normal     
## 4 lc                  8.05     0.00291 Anomaly    
## 5 nt                 25.4      0.00920 Normal     
## 6 vu               1026.       0.372   Normal     
## 7 <NA>               11.9      0.00432 Normal
# Calculating the proportion of animals in each order group
order_grouped <- order_grouped %>%
  mutate(probability = mean_brainwt / sum(mean_brainwt))

# Assign an "anomaly" tag to the group with the lowest probability
order_grouped <- order_grouped %>%
  mutate(anomaly_tag = ifelse(probability == min(probability), "Anomaly", "Normal"))

order_grouped
## # A tibble: 19 × 4
##    order           mean_brainwt probability anomaly_tag
##    <chr>                  <dbl>       <dbl> <lgl>      
##  1 Afrosoricida        0.0026           NaN NA         
##  2 Artiodactyla        0.198            NaN NA         
##  3 Carnivora           0.0986           NaN NA         
##  4 Cetacea           NaN                NaN NA         
##  5 Chiroptera          0.000275         NaN NA         
##  6 Cingulata           0.0459           NaN NA         
##  7 Didelphimorphia     0.0063           NaN NA         
##  8 Diprotodontia       0.0114           NaN NA         
##  9 Erinaceomorpha      0.00295          NaN NA         
## 10 Hyracoidea          0.0152           NaN NA         
## 11 Lagomorpha          0.0121           NaN NA         
## 12 Monotremata         0.025            NaN NA         
## 13 Perissodactyla      0.414            NaN NA         
## 14 Pilosa            NaN                NaN NA         
## 15 Primates            0.254            NaN NA         
## 16 Proboscidea         5.16             NaN NA         
## 17 Rodentia            0.00357          NaN NA         
## 18 Scandentia          0.0025           NaN NA         
## 19 Soricomorpha        0.000592         NaN NA
#Visualising the groups:-

# 1. Bar Chart:

# Bar chart for diet group
ggplot(diet_grouped, aes(x = vore, y = mean_sleep_total)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(x = "Diet", y = "Mean Sleep Total")

# Bar chart for conservation status group
ggplot(conservation_grouped, aes(x = conservation, y = mean_bodywt)) +
  geom_bar(stat = "identity", fill = "green") +
  labs(x = "Conservation Status", y = "Mean Body Weight")

# Bar chart for order group
ggplot(order_grouped, aes(x = order, y = mean_brainwt)) +
  geom_bar(stat = "identity", fill = "orange") +
  labs(x = "Order", y = "Mean Brain Weight")
## Warning: Removed 2 rows containing missing values (`position_stack()`).

# Testable Hypothesis:

# Hypothesis: Some groups are rarer than others due to specific ecological or biological factors that influence the sleep patterns of animals.

# 1. Body Size: Body size can influence sleep patterns. Larger animals might need more sleep, while smaller animals might sleep less. This could vary across orders.

# 2. Diet: Carnivorous animals may have different sleep patterns compared to herbivores or omnivores. Some orders might consist mostly of carnivorous species, while others have herbivorous or omnivorous species.

# 3. Brain Size: Brain size relative to body size can impact sleep patterns. Orders with larger-brained species may have different sleep needs.

Question 2:

# Let's pick two categorical variables, "vore" and "order,":

# Combinations that Never Show Up:

# Some combinations never show up because certain diet types may not be compatible with certain taxonomic orders. For example, there are no entries for "Carnivora" and "herbi" (Carnivorous mammals that are herbivores) or "Primates" and "carni" (Primates that are carnivores). This absence reflects the biological characteristics of these animals.


# Most/Least Common Combinations:

# The most common combination appears to be "herbi" (Herbivore) with "Rodentia" (Order containing rodents). This is likely because many rodents are herbivorous.
# "Carni" (Carnivore) with "Carnivora" (Order containing carnivorous mammals) is also a relatively common combination, reflecting the typical diet of carnivorous mammals.
# "herbi" (Herbivore) with "Artiodactyla" (Order containing even-toed ungulates) is also fairly common, as many ungulates are herbivorous.
# "omni" (Omnivore) appears in various combinations with different orders, which is expected given that omnivores can have diverse diets.
# "NA" (Not Available) values in "vore" indicate missing diet information for some species, which is why they are present in various combinations with different orders.