library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(summarytools)
# Loading the dataset
msleep <- read.csv("C:/Users/ABHIRAM/Downloads/msleep.csv")
# 1. Numeric Summary of Data
# Summarizing at least 10 columns of data(this dataset has only 11 columns in total)
numeric_summary <- msleep %>%
select(sleep_total, sleep_rem, awake, brainwt, bodywt) %>%
summarytools::descr()
numeric_summary
## Descriptive Statistics
## msleep
## N: 83
##
## awake bodywt brainwt sleep_rem sleep_total
## ----------------- -------- --------- --------- ----------- -------------
## Mean 13.57 166.14 0.28 1.88 10.43
## Std.Dev 4.45 786.84 0.98 1.30 4.45
## Min 4.10 0.00 0.00 0.10 1.90
## Q1 10.20 0.15 0.00 0.90 7.70
## Median 13.90 1.67 0.01 1.50 10.10
## Q3 16.30 50.00 0.14 2.40 13.80
## Max 22.10 6654.00 5.71 6.60 19.90
## MAD 5.04 2.43 0.02 1.19 5.04
## IQR 5.90 41.58 0.12 1.50 5.90
## CV 0.33 4.74 3.47 0.69 0.43
## Skewness -0.05 7.10 4.63 1.46 0.05
## SE.Skewness 0.26 0.26 0.32 0.31 0.26
## Kurtosis -0.71 53.72 20.96 2.73 -0.71
## N.Valid 83.00 83.00 56.00 61.00 83.00
## Pct.Valid 100.00 100.00 67.47 73.49 100.00
# 2. Categorical Summary
# For categorical columns (genus, vore, order, conservation), showing unique values and counts
categorical_summary <- msleep %>%
select(genus, vore, order, conservation) %>%
lapply(function(x) table(factor(x)))
categorical_summary
## $genus
##
## Acinonyx Aotus Aplodontia Blarina Bos
## 1 1 1 1 1
## Bradypus Callorhinus Calomys Canis Capreolus
## 1 1 1 1 1
## Capri Cavis Cercopithecus Chinchilla Condylura
## 1 1 1 1 1
## Cricetomys Cryptotis Dasypus Dendrohyrax Didelphis
## 1 1 1 1 1
## Elephas Eptesicus Equus Erinaceus Erythrocebus
## 1 1 2 1 1
## Eutamias Felis Galago Genetta Giraffa
## 1 1 1 1 1
## Globicephalus Haliochoerus Heterohyrax Homo Lemur
## 1 1 1 1 1
## Loxodonta Lutreolina Macaca Meriones Mesocricetus
## 1 1 1 1 1
## Microtus Mus Myotis Neofiber Nyctibeus
## 1 1 1 1 1
## Octodon Onychomys Oryctolagus Ovis Pan
## 1 1 1 1 1
## Panthera Papio Paraechinus Perodicticus Peromyscus
## 3 1 1 1 1
## Phalanger Phoca Phocoena Potorous Priodontes
## 1 1 1 1 1
## Procavia Rattus Rhabdomys Saimiri Scalopus
## 1 1 1 1 1
## Sigmodon Spalax Spermophilus Suncus Sus
## 1 1 3 1 1
## Tachyglossus Tamias Tapirus Tenrec Tupaia
## 1 1 1 1 1
## Tursiops Vulpes
## 1 2
##
## $vore
##
## carni herbi insecti omni
## 19 32 5 20
##
## $order
##
## Afrosoricida Artiodactyla Carnivora Cetacea Chiroptera
## 1 6 12 3 2
## Cingulata Didelphimorphia Diprotodontia Erinaceomorpha Hyracoidea
## 2 2 2 2 3
## Lagomorpha Monotremata Perissodactyla Pilosa Primates
## 1 1 3 1 12
## Proboscidea Rodentia Scandentia Soricomorpha
## 2 22 1 5
##
## $conservation
##
## cd domesticated en lc nt vu
## 2 10 4 27 4 7
# 3. Combined Summary
# Combining the numeric and categorical summaries
combined_summary <- list(Numeric_Summary = numeric_summary, Categorical_Summary = categorical_summary)
combined_summary
## x must either be a summarytools object created with freq(), descr(), or a list of summarytools objects created using by()
# 4. Novel Questions
# Question 1: What is the distribution of sleep_total across different orders?
q1 <- msleep %>%
group_by(order) %>%
summarise(Avg_Sleep_Total = mean(sleep_total, na.rm = TRUE),
Median_Sleep_Total = median(sleep_total, na.rm = TRUE))
q1
## # A tibble: 19 × 3
## order Avg_Sleep_Total Median_Sleep_Total
## <chr> <dbl> <dbl>
## 1 Afrosoricida 15.6 15.6
## 2 Artiodactyla 4.52 3.9
## 3 Carnivora 10.1 10.2
## 4 Cetacea 4.5 5.2
## 5 Chiroptera 19.8 19.8
## 6 Cingulata 17.8 17.8
## 7 Didelphimorphia 18.7 18.7
## 8 Diprotodontia 12.4 12.4
## 9 Erinaceomorpha 10.2 10.2
## 10 Hyracoidea 5.67 5.4
## 11 Lagomorpha 8.4 8.4
## 12 Monotremata 8.6 8.6
## 13 Perissodactyla 3.47 3.1
## 14 Pilosa 14.4 14.4
## 15 Primates 10.5 9.9
## 16 Proboscidea 3.6 3.6
## 17 Rodentia 12.5 12.9
## 18 Scandentia 8.9 8.9
## 19 Soricomorpha 11.1 10.3
# Question 2: Is there a correlation between body weight and brain weight?
q2 <- msleep %>%
select(bodywt, brainwt) %>%
na.omit() %>%
summarise(Correlation = cor(bodywt, brainwt))
q2
## Correlation
## 1 0.9337822
# Question 3: What is the most common vore type for each order?
q3 <- msleep %>%
group_by(order, vore) %>%
summarise(Count = n()) %>%
arrange(order, desc(Count)) %>%
slice(1)
## `summarise()` has grouped output by 'order'. You can override using the
## `.groups` argument.
q3
## # A tibble: 19 × 3
## # Groups: order [19]
## order vore Count
## <chr> <chr> <int>
## 1 Afrosoricida omni 1
## 2 Artiodactyla herbi 5
## 3 Carnivora carni 12
## 4 Cetacea carni 3
## 5 Chiroptera insecti 2
## 6 Cingulata carni 1
## 7 Didelphimorphia carni 1
## 8 Diprotodontia herbi 1
## 9 Erinaceomorpha omni 1
## 10 Hyracoidea herbi 2
## 11 Lagomorpha herbi 1
## 12 Monotremata insecti 1
## 13 Perissodactyla herbi 3
## 14 Pilosa herbi 1
## 15 Primates omni 10
## 16 Proboscidea herbi 2
## 17 Rodentia herbi 16
## 18 Scandentia omni 1
## 19 Soricomorpha omni 3
# Question 4: How does conservation status vary among different genera?
q4 <- msleep %>%
group_by(genus, conservation) %>%
summarise(Count = n()) %>%
arrange(genus, desc(Count)) %>%
slice(1)
## `summarise()` has grouped output by 'genus'. You can override using the
## `.groups` argument.
q4
## # A tibble: 77 × 3
## # Groups: genus [77]
## genus conservation Count
## <chr> <chr> <int>
## 1 Acinonyx lc 1
## 2 Aotus <NA> 1
## 3 Aplodontia nt 1
## 4 Blarina lc 1
## 5 Bos domesticated 1
## 6 Bradypus <NA> 1
## 7 Callorhinus vu 1
## 8 Calomys <NA> 1
## 9 Canis domesticated 1
## 10 Capreolus lc 1
## # ℹ 67 more rows
# Question 5: Is there a relationship between sleep patterns (e.g., sleep_total) and conservation status?
q5 <- msleep %>%
group_by(conservation) %>%
summarise(Avg_Sleep_Total = mean(sleep_total, na.rm = TRUE),
Median_Sleep_Total = median(sleep_total, na.rm = TRUE))
q5
## # A tibble: 7 × 3
## conservation Avg_Sleep_Total Median_Sleep_Total
## <chr> <dbl> <dbl>
## 1 cd 2.3 2.3
## 2 domesticated 7.58 8.75
## 3 en 13.0 15.0
## 4 lc 11.4 10.9
## 5 nt 13.0 13.4
## 6 vu 6.93 5.6
## 7 <NA> 11.2 10.6
# 6. Visual Summary
# Visualization 1: Distribution of sleep_total
ggplot(msleep, aes(x = sleep_total)) +
geom_histogram(fill = "skyblue", color = "black", bins = 20) +
labs(title = "Distribution of Sleep Total", x = "Sleep Total Hours")

# Visualization 2: Scatterplot of bodywt vs. brainwt
ggplot(msleep, aes(x = bodywt, y = brainwt)) +
geom_point(aes(color = vore), alpha = 0.7) +
labs(title = "Scatterplot of Body Weight vs. Brain Weight",
x = "Body Weight (kg)", y = "Brain Weight (kg)")
## Warning: Removed 27 rows containing missing values (`geom_point()`).

# Visualization 3: Boxplot of sleep_total by order
ggplot(msleep, aes(x = order, y = sleep_total)) +
geom_boxplot(fill = "lightgreen", color = "black") +
coord_flip() +
labs(title = "Sleep Total by Order", x = "Order", y = "Sleep Total Hours")

# Visualization 4: Bar chart of conservation status counts
ggplot(msleep, aes(x = conservation)) +
geom_bar(fill = "salmon") +
labs(title = "Conservation Status Counts", x = "Conservation Status", y = "Count")

# Visualization 5: Bar chart of vore counts
ggplot(msleep, aes(x = vore)) +
geom_bar(fill = "lightblue") +
labs(title = "Vore Type Counts", x = "Vore Type", y = "Count")
