library(rmarkdown); library(knitr); library(readxl)
set.seed(69)
library(readxl)
MooseWolvesData <- read_excel("C:/Users/Sarah Chock/OneDrive - University of St. Thomas/Senior Year/STAT 360 Comp Stat and Data Analysis/Exploratory Data Analysis/MooseWolvesData.xlsx", sheet = "1. population level data")
There are 61 cases and 33 dimensions
dim(MooseWolvesData)
## [1] 61 33
head(MooseWolvesData)
## # A tibble: 6 x 33
## year wolves moose `kill rate` `predation rate` `f (wolves)` `ancestry (immi~`
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 1959 20 538. N/A N/A N/A N/A
## 2 1960 22 564. N/A N/A N/A N/A
## 3 1961 22 572. N/A N/A N/A N/A
## 4 1962 23 579. N/A N/A N/A N/A
## 5 1963 20 596. N/A N/A N/A N/A
## 6 1964 26 620. N/A N/A N/A N/A
## # ... with 26 more variables: `Juvenile Survival` <chr>,
## # `Adult Survival` <chr>, `overall survival (genetic-CR)` <chr>,
## # `overall survival (field-based estimate)` <chr>,
## # `% mortality, obsolete` <chr>, `%recruitment, obsolete` <chr>,
## # `recruitment rate, moose (aerial surveys)` <chr>,
## # `mean age (excluding calves)` <chr>,
## # `proportion of moose population that are senescent` <chr>, ...
There are 28 wolves in the 7th year
mwd <- as.matrix(MooseWolvesData)
mwd[7,2]
## wolves
## "28"
There are 1182.5755 moose in the 13th year - why are there partial moose?
mwd[13,3]
## moose
## "1182.5755"
There are 12 wolves and 1116.0117 moose in the 20th year
mwd[30,2:3]
## wolves moose
## "12" "1116.0117"
A <- matrix(c(4,3,8), nrow = 1, ncol = 3)
B <- matrix(c(9), nrow = 1, ncol = 1)
C <- matrix(c(7,5,6,1), nrow = 2, ncol = 2)
D <- matrix(c(2), nrow = 1, ncol = 1)
twoby1 <- rbind(D,B)
twoby3 <- cbind(twoby1,C)
rbind(twoby3,A)
## [,1] [,2] [,3]
## [1,] 2 7 6
## [2,] 9 5 1
## [3,] 4 3 8
DistressData <- read_excel("C:/Users/Sarah Chock/OneDrive - University of St. Thomas/Senior Year/STAT 360 Comp Stat and Data Analysis/Exploratory Data Analysis/DistressData.xlsx")
dd <- as.matrix(DistressData)
dd[which(dd==3)] = 6
dd[which(dd==5)] = 3
dd[which(dd==6)] = 5
head(dd)
## Hopelessness Overwhelmed Exhausted VeryLonely VerySad Depressed Anxiety
## [1,] 3 5 5 3 3 3 4
## [2,] 4 4 4 3 3 2 5
## [3,] 1 4 5 4 2 2 2
## [4,] 3 5 4 5 4 2 5
## [5,] 3 3 3 3 3 1 1
## [6,] 5 4 4 5 5 4 5
## SelfHarm SuicidalThoughts SuicidalAttempts
## [1,] 2 2 1
## [2,] 1 1 1
## [3,] 1 1 1
## [4,] 1 1 1
## [5,] 1 1 1
## [6,] 1 1 1
PovertyData <- data.frame(Name = c("Estonia", "Luxembourg", "Chile", "Belgium", "Greece", "Spain", "Djibouti", "Cyprus", "Lithuania", "Kosovo"),
Water = c(6.6,.2,.1,.3,.5,.2,7.1,.5,9.9,.7),
Electricity = c(0,0,.3,0,0,0,39.8,0,0,.2),
Sanitation = c(5.3,0,.6,.9,.3,.2,45.4,.5,10.6,1.4),
Education = c(0,.8,4,1.9,1.7,3.4,30.1,1.4,.2,.5))
PovertyData
## Name Water Electricity Sanitation Education
## 1 Estonia 6.6 0.0 5.3 0.0
## 2 Luxembourg 0.2 0.0 0.0 0.8
## 3 Chile 0.1 0.3 0.6 4.0
## 4 Belgium 0.3 0.0 0.9 1.9
## 5 Greece 0.5 0.0 0.3 1.7
## 6 Spain 0.2 0.0 0.2 3.4
## 7 Djibouti 7.1 39.8 45.4 30.1
## 8 Cyprus 0.5 0.0 0.5 1.4
## 9 Lithuania 9.9 0.0 10.6 0.2
## 10 Kosovo 0.7 0.2 1.4 0.5
There are 10 cases and 5 dimensions
dim(PovertyData)
## [1] 10 5
set.seed(420)
x <- sample.int(10, n = 10, replace = TRUE)
x
## [1] 5 5 10 8 9 5 5 9 8 9
bootstrap <- PovertyData[x,]
bootstrap
## Name Water Electricity Sanitation Education
## 5 Greece 0.5 0.0 0.3 1.7
## 5.1 Greece 0.5 0.0 0.3 1.7
## 10 Kosovo 0.7 0.2 1.4 0.5
## 8 Cyprus 0.5 0.0 0.5 1.4
## 9 Lithuania 9.9 0.0 10.6 0.2
## 5.2 Greece 0.5 0.0 0.3 1.7
## 5.3 Greece 0.5 0.0 0.3 1.7
## 9.1 Lithuania 9.9 0.0 10.6 0.2
## 8.1 Cyprus 0.5 0.0 0.5 1.4
## 9.2 Lithuania 9.9 0.0 10.6 0.2
The new data set has all of the same dimensions, but differs in the cases. It contains a subset of the cases from original dataset, with repetitions. In my specific scenario, it has multiple copies of 4 different cases.