msleep <- read.csv("C:/Users/ABHIRAM/Downloads/msleep.csv")
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked _by_ '.GlobalEnv':
##
## msleep
# Set the seed for reproducibility
set.seed(123)
# Number of rows in the dataset
total_rows <- nrow(msleep)
# Creating 5 random subsamples
num_subsamples <- 5
sample_size <- round(0.5 * total_rows) # Approximately 50% of the data
subsamples <- list() # Creating a list to store the subsamples
for (i in 1:num_subsamples) {
# Randomly sampling rows from the dataset
sample_rows <- sample(1:total_rows, size = sample_size, replace = TRUE)
# Selecting a random set of columns (at least 6 columns) with replacement
columns_to_select <- sample(1:ncol(msleep), size = sample_size, replace = TRUE)
# Creating a new data frame with the selected rows and columns
subsamples[[i]] <- msleep[sample_rows, columns_to_select]
# Assigning meaningful column names to the subsample (optional)
colnames(subsamples[[i]]) <- paste("col", 1:ncol(subsamples[[i]]))
# Printing the first few rows of each subsample
cat("Subsample", i, ":\n")
print(head(subsamples[[i]]))
cat("\n")
}
## Subsample 1 :
## col 1 col 2 col 3 col 4 col 5 col 6 col 7
## 31 Globicephalus Pilot whale 21.35 800.000 21.35 2.7 cd
## 79 Tupaia Tree shrew 15.10 0.104 15.10 8.9 <NA>
## 51 Panthera Tiger 8.20 162.564 8.20 15.8 en
## 14 Chinchilla Chinchilla 11.50 0.420 11.50 12.5 domesticated
## 67 Scalopus Eastern american mole 15.60 0.075 15.60 8.4 lc
## 42 Mus House mouse 11.50 0.022 11.50 12.5 nt
## col 8 col 9 col 10 col 11 col 12 col 13 col 14 col 15 col 16
## 31 21.35 NA Cetacea 2.7 800.000 NA 2.7 2.7 0.1
## 79 15.10 0.0025 Scandentia 8.9 0.104 0.2333333 8.9 8.9 2.6
## 51 8.20 NA Carnivora 15.8 162.564 NA 15.8 15.8 NA
## 14 11.50 0.0064 Rodentia 12.5 0.420 0.1166667 12.5 12.5 1.5
## 67 15.60 0.0012 Soricomorpha 8.4 0.075 0.1666667 8.4 8.4 2.1
## 42 11.50 0.0004 Rodentia 12.5 0.022 0.1833333 12.5 12.5 1.4
## col 17 col 18 col 19 col 20
## 31 Pilot whale 2.7 Globicephalus Pilot whale
## 79 Tree shrew 8.9 Tupaia Tree shrew
## 51 Tiger 15.8 Panthera Tiger
## 14 Chinchilla 12.5 Chinchilla Chinchilla
## 67 Eastern american mole 8.4 Scalopus Eastern american mole
## 42 House mouse 12.5 Mus House mouse
## col 21 col 22 col 23 col 24 col 25 col 26 col 27
## 31 Globicephalus Cetacea cd 2.7 carni 21.35 Cetacea
## 79 Tupaia Scandentia <NA> 8.9 omni 15.10 Scandentia
## 51 Panthera Carnivora en 15.8 carni 8.20 Carnivora
## 14 Chinchilla Rodentia domesticated 12.5 herbi 11.50 Rodentia
## 67 Scalopus Soricomorpha lc 8.4 insecti 15.60 Soricomorpha
## 42 Mus Rodentia nt 12.5 herbi 11.50 Rodentia
## col 28 col 29 col 30 col 31 col 32 col 33 col 34 col 35 col 36 col 37
## 31 2.7 21.35 21.35 0.1 carni NA 21.35 carni 0.1 carni
## 79 8.9 15.10 15.10 2.6 omni 0.2333333 15.10 omni 2.6 omni
## 51 15.8 8.20 8.20 NA carni NA 8.20 carni NA carni
## 14 12.5 11.50 11.50 1.5 herbi 0.1166667 11.50 herbi 1.5 herbi
## 67 8.4 15.60 15.60 2.1 insecti 0.1666667 15.60 insecti 2.1 insecti
## 42 12.5 11.50 11.50 1.4 herbi 0.1833333 11.50 herbi 1.4 herbi
## col 38 col 39 col 40 col 41 col 42
## 31 0.1 2.7 NA cd cd
## 79 2.6 8.9 0.0025 <NA> <NA>
## 51 NA 15.8 NA en en
## 14 1.5 12.5 0.0064 domesticated domesticated
## 67 2.1 8.4 0.0012 lc lc
## 42 1.4 12.5 0.0004 nt nt
##
## Subsample 2 :
## col 1 col 2 col 3 col 4 col 5 col 6
## 8 Vesper mouse NA Rodentia 17.0 NA 7.0
## 51 Tiger NA Carnivora 8.2 NA 15.8
## 74 Pig 0.500000 Artiodactyla 14.9 0.500000 9.1
## 50 Chimpanzee 1.416667 Primates 14.3 1.416667 9.7
## 74.1 Pig 0.500000 Artiodactyla 14.9 0.500000 9.1
## 76 Eastern american chipmunk NA Rodentia 8.2 NA 15.8
## col 7 col 8 col 9 col 10 col 11 col 12 col 13
## 8 0.045 Rodentia NA <NA> Rodentia Rodentia 7.0
## 51 162.564 Carnivora NA carni Carnivora Carnivora 15.8
## 74 86.250 Artiodactyla 0.500000 omni Artiodactyla Artiodactyla 9.1
## 50 52.200 Primates 1.416667 omni Primates Primates 9.7
## 74.1 86.250 Artiodactyla 0.500000 omni Artiodactyla Artiodactyla 9.1
## 76 0.112 Rodentia NA herbi Rodentia Rodentia 15.8
## col 14 col 15 col 16 col 17 col 18 col 19
## 8 Vesper mouse NA 0.045 Rodentia 0.045 17.0
## 51 Tiger NA 162.564 Carnivora 162.564 8.2
## 74 Pig 0.18 86.250 Artiodactyla 86.250 14.9
## 50 Chimpanzee 0.44 52.200 Primates 52.200 14.3
## 74.1 Pig 0.18 86.250 Artiodactyla 86.250 14.9
## 76 Eastern american chipmunk NA 0.112 Rodentia 0.112 8.2
## col 20 col 21 col 22 col 23 col 24 col 25 col 26 col 27
## 8 NA NA <NA> Calomys 0.045 7.0 17.0 NA
## 51 NA NA en Panthera 162.564 15.8 8.2 NA
## 74 2.4 0.500000 domesticated Sus 86.250 9.1 14.9 0.500000
## 50 1.4 1.416667 <NA> Pan 52.200 9.7 14.3 1.416667
## 74.1 2.4 0.500000 domesticated Sus 86.250 9.1 14.9 0.500000
## 76 NA NA <NA> Tamias 0.112 15.8 8.2 NA
## col 28 col 29 col 30 col 31 col 32 col 33
## 8 NA Rodentia <NA> NA Vesper mouse NA
## 51 NA Carnivora en NA Tiger NA
## 74 0.18 Artiodactyla domesticated 2.4 Pig 0.500000
## 50 0.44 Primates <NA> 1.4 Chimpanzee 1.416667
## 74.1 0.18 Artiodactyla domesticated 2.4 Pig 0.500000
## 76 NA Rodentia <NA> NA Eastern american chipmunk NA
## col 34 col 35 col 36 col 37 col 38 col 39 col 40 col 41 col 42
## 8 NA NA 17.0 NA Calomys <NA> 17.0 NA NA
## 51 NA NA 8.2 NA Panthera en 8.2 NA NA
## 74 0.500000 0.18 14.9 0.500000 Sus domesticated 14.9 2.4 2.4
## 50 1.416667 0.44 14.3 1.416667 Pan <NA> 14.3 1.4 1.4
## 74.1 0.500000 0.18 14.9 0.500000 Sus domesticated 14.9 2.4 2.4
## 76 NA NA 8.2 NA Tamias <NA> 8.2 NA NA
##
## Subsample 3 :
## col 1 col 2 col 3 col 4 col 5 col 6 col 7
## 58 <NA> 10.3 Diprotodontia <NA> 10.3 Phalanger Phalanger
## 61 <NA> 12.9 Diprotodontia herbi 12.9 Potoroo Potorous
## 74 domesticated 14.9 Artiodactyla omni 14.9 Pig Sus
## 24 domesticated 20.9 Perissodactyla herbi 20.9 Donkey Equus
## 63 lc 18.6 Hyracoidea <NA> 18.6 Rock hyrax Procavia
## 54 <NA> 14.6 Primates omni 14.6 Baboon Papio
## col 8 col 9 col 10 col 11 col 12 col 13 col 14
## 58 Diprotodontia 0.0114 Phalanger <NA> <NA> 10.3 NA
## 61 Diprotodontia NA Potoroo <NA> <NA> 12.9 NA
## 74 Artiodactyla 0.1800 Pig domesticated domesticated 14.9 0.5000000
## 24 Perissodactyla 0.4190 Donkey domesticated domesticated 20.9 NA
## 63 Hyracoidea 0.0210 Rock hyrax lc lc 18.6 NA
## 54 Primates 0.1800 Baboon <NA> <NA> 14.6 0.6666667
## col 15 col 16 col 17 col 18 col 19 col 20 col 21 col 22 col 23
## 58 1.8 10.3 <NA> Phalanger 0.0114 13.7 1.8 10.3 Phalanger
## 61 1.5 12.9 <NA> Potorous NA 11.1 1.5 12.9 Potoroo
## 74 2.4 14.9 domesticated Sus 0.1800 9.1 2.4 14.9 Pig
## 24 0.4 20.9 domesticated Equus 0.4190 3.1 0.4 20.9 Donkey
## 63 0.5 18.6 lc Procavia 0.0210 5.4 0.5 18.6 Rock hyrax
## 54 1.0 14.6 <NA> Papio 0.1800 9.4 1.0 14.6 Baboon
## col 24 col 25 col 26 col 27 col 28 col 29
## 58 <NA> <NA> NA <NA> 1.8 Diprotodontia
## 61 <NA> <NA> NA <NA> 1.5 Diprotodontia
## 74 domesticated domesticated 0.5000000 domesticated 2.4 Artiodactyla
## 24 domesticated domesticated NA domesticated 0.4 Perissodactyla
## 63 lc lc NA lc 0.5 Hyracoidea
## 54 <NA> <NA> 0.6666667 <NA> 1.0 Primates
## col 30 col 31 col 32 col 33 col 34 col 35 col 36
## 58 Phalanger Phalanger Phalanger Phalanger Phalanger Phalanger NA
## 61 Potorous Potoroo Potoroo Potorous Potoroo Potorous NA
## 74 Sus Pig Pig Sus Pig Sus 0.5000000
## 24 Equus Donkey Donkey Equus Donkey Equus NA
## 63 Procavia Rock hyrax Rock hyrax Procavia Rock hyrax Procavia NA
## 54 Papio Baboon Baboon Papio Baboon Papio 0.6666667
## col 37 col 38 col 39 col 40 col 41 col 42
## 58 Phalanger <NA> Phalanger Phalanger <NA> 10.3
## 61 Potoroo herbi Potorous Potorous <NA> 12.9
## 74 Pig omni Sus Sus domesticated 14.9
## 24 Donkey herbi Equus Equus domesticated 20.9
## 63 Rock hyrax <NA> Procavia Procavia lc 18.6
## 54 Baboon omni Papio Papio <NA> 14.6
##
## Subsample 4 :
## col 1 col 2 col 3 col 4 col 5 col 6 col 7 col 8
## 13 NA Primates 10.0 Primates Primates 14.0 NA 0.7
## 42 0.0004 Rodentia 12.5 Rodentia Rodentia 11.5 0.1833333 1.4
## 61 NA Diprotodontia 11.1 Diprotodontia Diprotodontia 12.9 NA 1.5
## 70 0.0057 Rodentia 16.6 Rodentia Rodentia 7.4 NA NA
## 38 0.1790 Primates 10.1 Primates Primates 13.9 0.7500000 1.2
## 64 0.0019 Rodentia 13.0 Rodentia Rodentia 11.0 0.1833333 2.4
## col 9 col 10 col 11 col 12 col 13 col 14 col 15
## 13 NA Grivet Cercopithecus NA omni NA 0.7
## 42 0.0004 House mouse Mus 0.1833333 herbi 0.0004 1.4
## 61 NA Potoroo Potorous NA herbi NA 1.5
## 70 0.0057 Arctic ground squirrel Spermophilus NA herbi 0.0057 NA
## 38 0.1790 Macaque Macaca 0.7500000 omni 0.1790 1.2
## 64 0.0019 Laboratory rat Rattus 0.1833333 herbi 0.0019 2.4
## col 16 col 17 col 18 col 19 col 20 col 21 col 22
## 13 4.750 Grivet Cercopithecus omni omni 10.0 NA
## 42 0.022 House mouse Mus herbi herbi 12.5 0.0004
## 61 1.100 Potoroo Potorous herbi herbi 11.1 NA
## 70 0.920 Arctic ground squirrel Spermophilus herbi herbi 16.6 0.0057
## 38 6.800 Macaque Macaca omni omni 10.1 0.1790
## 64 0.320 Laboratory rat Rattus herbi herbi 13.0 0.0019
## col 23 col 24 col 25 col 26
## 13 lc Primates Cercopithecus Grivet
## 42 nt Rodentia Mus House mouse
## 61 <NA> Diprotodontia Potorous Potoroo
## 70 lc Rodentia Spermophilus Arctic ground squirrel
## 38 <NA> Primates Macaca Macaque
## 64 lc Rodentia Rattus Laboratory rat
## col 27 col 28 col 29 col 30 col 31 col 32
## 13 Grivet lc Cercopithecus Cercopithecus omni lc
## 42 House mouse nt Mus Mus herbi nt
## 61 Potoroo <NA> Potorous Potorous herbi <NA>
## 70 Arctic ground squirrel lc Spermophilus Spermophilus herbi lc
## 38 Macaque <NA> Macaca Macaca omni <NA>
## 64 Laboratory rat lc Rattus Rattus herbi lc
## col 33 col 34 col 35 col 36 col 37 col 38 col 39
## 13 10.0 NA omni 4.750 omni Grivet NA
## 42 12.5 0.0004 herbi 0.022 herbi House mouse 0.1833333
## 61 11.1 NA herbi 1.100 herbi Potoroo NA
## 70 16.6 0.0057 herbi 0.920 herbi Arctic ground squirrel NA
## 38 10.1 0.1790 omni 6.800 omni Macaque 0.7500000
## 64 13.0 0.0019 herbi 0.320 herbi Laboratory rat 0.1833333
## col 40 col 41 col 42
## 13 Grivet 4.750 Primates
## 42 House mouse 0.022 Rodentia
## 61 Potoroo 1.100 Diprotodontia
## 70 Arctic ground squirrel 0.920 Rodentia
## 38 Macaque 6.800 Primates
## 64 Laboratory rat 0.320 Rodentia
##
## Subsample 5 :
## col 1 col 2 col 3 col 4 col 5 col 6 col 7 col 8
## 45 <NA> 11.0 carni NA 0.0125 <NA> NA Primates
## 10 lc 3.0 herbi NA 0.0982 lc NA Artiodactyla
## 42 nt 12.5 herbi 0.1833333 0.0004 nt 1.4 Rodentia
## 24 domesticated 3.1 herbi NA 0.4190 domesticated 0.4 Perissodactyla
## 18 lc 17.4 carni 0.3833333 0.0108 lc 3.1 Cingulata
## 81 <NA> 6.3 carni NA 0.0175 <NA> 1.3 Carnivora
## col 9 col 10 col 11 col 12 col 13 col 14 col 15 col 16 col 17
## 45 13.0 1.400 1.400 carni 0.0125 NA Nyctibeus 13.0 11.0
## 10 21.0 14.800 14.800 herbi 0.0982 NA Capreolus 21.0 3.0
## 42 11.5 0.022 0.022 herbi 0.0004 1.4 Mus 11.5 12.5
## 24 20.9 187.000 187.000 herbi 0.4190 0.4 Equus 20.9 3.1
## 18 6.6 3.500 3.500 carni 0.0108 3.1 Dasypus 6.6 17.4
## 81 17.7 2.000 2.000 carni 0.0175 1.3 Genetta 17.7 6.3
## col 18 col 19 col 20 col 21 col 22 col 23 col 24
## 45 Primates 11.0 Nyctibeus Nyctibeus NA NA 1.400
## 10 Artiodactyla 3.0 Capreolus Capreolus NA NA 14.800
## 42 Rodentia 12.5 Mus Mus 0.1833333 0.1833333 0.022
## 24 Perissodactyla 3.1 Equus Equus NA NA 187.000
## 18 Cingulata 17.4 Dasypus Dasypus 0.3833333 0.3833333 3.500
## 81 Carnivora 6.3 Genetta Genetta NA NA 2.000
## col 25 col 26 col 27 col 28 col 29 col 30 col 31 col 32
## 45 <NA> 13.0 NA carni NA NA 1.400 0.0125
## 10 lc 21.0 NA herbi NA NA 14.800 0.0982
## 42 nt 11.5 1.4 herbi 1.4 0.1833333 0.022 0.0004
## 24 domesticated 20.9 0.4 herbi 0.4 NA 187.000 0.4190
## 18 lc 6.6 3.1 carni 3.1 0.3833333 3.500 0.0108
## 81 <NA> 17.7 1.3 carni 1.3 NA 2.000 0.0175
## col 33 col 34 col 35 col 36 col 37 col 38
## 45 <NA> Slow loris NA 11.0 NA Primates
## 10 lc Roe deer NA 3.0 NA Artiodactyla
## 42 nt House mouse 1.4 12.5 0.1833333 Rodentia
## 24 domesticated Donkey 0.4 3.1 NA Perissodactyla
## 18 lc Long-nosed armadillo 3.1 17.4 0.3833333 Cingulata
## 81 <NA> Genet 1.3 6.3 NA Carnivora
## col 39 col 40 col 41 col 42
## 45 11.0 Slow loris carni Primates
## 10 3.0 Roe deer herbi Artiodactyla
## 42 12.5 House mouse herbi Rodentia
## 24 3.1 Donkey herbi Perissodactyla
## 18 17.4 Long-nosed armadillo carni Cingulata
## 81 6.3 Genet carni Carnivora
Subsample 1:
It includes various species such as Globicephalus, Tupaia, Panthera, Chinchilla, Scalopus, and Mus. Different columns contain information about these species, including their names, characteristics, and classifications. Notable aspects: This subsample includes different species with a variety of characteristics, classifications, and numerical values for columns like col 3, col 4, col 6, etc.
Subsample 2:
Species in this subsample include Vesper mouse, Tiger, Pig, Chimpanzee, and Eastern American chipmunk. Information about these species is available in columns like col 2, col 3, col 4, col 5, etc. Notable aspects: This subsample includes species from different categories like rodents, carnivores, and primates, with varying values for attributes like body weight and diet.
Subsample 3:
It includes species such as Diprotodontia, Potoroo, Pig, Donkey, Rock hyrax, and Baboon. Information about these species is present in columns like col 2, col 3, col 4, col 5, etc. Notable aspects: This subsample features species from diverse taxonomic groups, such as marsupials, artiodactyls, and primates, with a range of body weights and dietary preferences.
Subsample 4:
Species in this subsample include Primates, Rodentia, Diprotodontia, Arctic ground squirrel, Macaque, and Laboratory rat. Information about these species is available in columns like col 1, col 2, col 3, col 4, etc. Notable aspects: This subsample contains species from various taxonomic groups and includes rodents, primates, and other mammals, with varying body weights and dietary preferences.
Subsample 5:
It consists of species like Primates, Artiodactyla, Rodentia, Perissodactyla, Cingulata, and Carnivora. Information about these species is present in columns like col 1, col 2, col 3, col 4, etc. Notable aspects: This subsample includes species from different categories, such as mammals, with a wide range of body weights and dietary preferences. Differences:
The subsamples differ in terms of the species included, their characteristics, and classifications. They also differ in terms of the distribution of numerical values across various columns, such as body weight and diet.
Anomalies:
Anomalies in one subsample may include extreme values or outliers in columns like body weight (col 4) or diet type (col 10). For example, a very high body weight compared to the other species in the subsample might be considered an anomaly. Anomalies may also involve missing values (NA) in critical columns that provide taxonomic information (e.g., col 2 and col 3). Consistencies:
Some consistencies among all subsamples include the presence of numerical columns (e.g., body weight), categorical columns (e.g., diet type), and columns with species names or classifications. Taxonomic information is often consistent, with the first few columns providing details about the species’ taxonomy (col 2 and col 3).
Data Interpretation: The investigation highlighted instances where the same information was presented in multiple columns, making it challenging to interpret the data accurately. Future analysis should involve careful consideration of how to aggregate or consolidate redundant information to obtain meaningful insights.
Column Labels and Meanings: Understanding the true meanings and context of column labels is essential. In some cases, the investigation found that column labels were ambiguous or inconsistent. Going forward, clear documentation or a data dictionary should be created to ensure that all stakeholders understand the content and purpose of each column.
Data Transformation: The investigation identified the need for data transformation, such as converting data types and scaling numerical values. Proper data transformation can facilitate more meaningful analysis and modeling in the future.