STAT 360: Computational Statistics and Data Analysis

Load R Libraries, Import and Attach Relevant Data, and Specify Seed

library(rmarkdown); library(knitr); library(readxl)
set.seed(69)

EXERCISE 01

Part (a)

library(readxl)
MooseWolvesData <- read_excel("C:/Users/Sarah Chock/OneDrive - University of St. Thomas/Senior Year/STAT 360 Comp Stat and Data Analysis/Exploratory Data Analysis/MooseWolvesData.xlsx", sheet = "1. population level data")

Part (b)

There are 61 cases and 33 dimensions

dim(MooseWolvesData)
## [1] 61 33

Part (c)

head(MooseWolvesData)
## # A tibble: 6 x 33
##    year wolves moose `kill rate` `predation rate` `f (wolves)` `ancestry (immi~`
##   <dbl>  <dbl> <dbl> <chr>       <chr>            <chr>        <chr>            
## 1  1959     20  538. N/A         N/A              N/A          N/A              
## 2  1960     22  564. N/A         N/A              N/A          N/A              
## 3  1961     22  572. N/A         N/A              N/A          N/A              
## 4  1962     23  579. N/A         N/A              N/A          N/A              
## 5  1963     20  596. N/A         N/A              N/A          N/A              
## 6  1964     26  620. N/A         N/A              N/A          N/A              
## # ... with 26 more variables: `Juvenile Survival` <chr>,
## #   `Adult Survival` <chr>, `overall survival (genetic-CR)` <chr>,
## #   `overall survival (field-based estimate)` <chr>,
## #   `% mortality, obsolete` <chr>, `%recruitment, obsolete` <chr>,
## #   `recruitment rate, moose (aerial surveys)` <chr>,
## #   `mean age (excluding calves)` <chr>,
## #   `proportion of moose population that are senescent` <chr>, ...

Part (d)

There are 28 wolves in the 7th year

mwd <- as.matrix(MooseWolvesData)
mwd[7,2]
## wolves 
##   "28"

Part (e)

There are 1182.5755 moose in the 13th year - why are there partial moose?

mwd[13,3]
##       moose 
## "1182.5755"

Part (f)

There are 12 wolves and 1116.0117 moose in the 20th year

mwd[30,2:3]
##      wolves       moose 
##        "12" "1116.0117"

EXERCISE 02

A <- matrix(c(4,3,8), nrow = 1, ncol = 3)
B <- matrix(c(9), nrow = 1, ncol = 1)
C <- matrix(c(7,5,6,1), nrow = 2, ncol = 2)
D <- matrix(c(2), nrow = 1, ncol = 1)
twoby1 <- rbind(D,B)
twoby3 <- cbind(twoby1,C)
rbind(twoby3,A)
##      [,1] [,2] [,3]
## [1,]    2    7    6
## [2,]    9    5    1
## [3,]    4    3    8

EXERCISE 03

Part (a)

DistressData <- read_excel("C:/Users/Sarah Chock/OneDrive - University of St. Thomas/Senior Year/STAT 360 Comp Stat and Data Analysis/Exploratory Data Analysis/DistressData.xlsx")
dd <- as.matrix(DistressData)

Part (b)

dd[which(dd==3)] = 6
dd[which(dd==5)] = 3
dd[which(dd==6)] = 5

Part (c)

head(dd)
##      Hopelessness Overwhelmed Exhausted VeryLonely VerySad Depressed Anxiety
## [1,]            3           5         5          3       3         3       4
## [2,]            4           4         4          3       3         2       5
## [3,]            1           4         5          4       2         2       2
## [4,]            3           5         4          5       4         2       5
## [5,]            3           3         3          3       3         1       1
## [6,]            5           4         4          5       5         4       5
##      SelfHarm SuicidalThoughts SuicidalAttempts
## [1,]        2                2                1
## [2,]        1                1                1
## [3,]        1                1                1
## [4,]        1                1                1
## [5,]        1                1                1
## [6,]        1                1                1

EXERCISE 04

Part (a)

PovertyData <- data.frame(Name = c("Estonia", "Luxembourg", "Chile", "Belgium", "Greece", "Spain", "Djibouti", "Cyprus", "Lithuania", "Kosovo"),
                          Water = c(6.6,.2,.1,.3,.5,.2,7.1,.5,9.9,.7),
                          Electricity = c(0,0,.3,0,0,0,39.8,0,0,.2),
                          Sanitation = c(5.3,0,.6,.9,.3,.2,45.4,.5,10.6,1.4),
                          Education = c(0,.8,4,1.9,1.7,3.4,30.1,1.4,.2,.5))
PovertyData
##          Name Water Electricity Sanitation Education
## 1     Estonia   6.6         0.0        5.3       0.0
## 2  Luxembourg   0.2         0.0        0.0       0.8
## 3       Chile   0.1         0.3        0.6       4.0
## 4     Belgium   0.3         0.0        0.9       1.9
## 5      Greece   0.5         0.0        0.3       1.7
## 6       Spain   0.2         0.0        0.2       3.4
## 7    Djibouti   7.1        39.8       45.4      30.1
## 8      Cyprus   0.5         0.0        0.5       1.4
## 9   Lithuania   9.9         0.0       10.6       0.2
## 10     Kosovo   0.7         0.2        1.4       0.5

Part (b)

There are 10 cases and 5 dimensions

dim(PovertyData)
## [1] 10  5

Part (c)

set.seed(420)
x <- sample.int(10, n = 10, replace = TRUE)
x
##  [1]  5  5 10  8  9  5  5  9  8  9

Part (d)

bootstrap <- PovertyData[x,]
bootstrap
##          Name Water Electricity Sanitation Education
## 5      Greece   0.5         0.0        0.3       1.7
## 5.1    Greece   0.5         0.0        0.3       1.7
## 10     Kosovo   0.7         0.2        1.4       0.5
## 8      Cyprus   0.5         0.0        0.5       1.4
## 9   Lithuania   9.9         0.0       10.6       0.2
## 5.2    Greece   0.5         0.0        0.3       1.7
## 5.3    Greece   0.5         0.0        0.3       1.7
## 9.1 Lithuania   9.9         0.0       10.6       0.2
## 8.1    Cyprus   0.5         0.0        0.5       1.4
## 9.2 Lithuania   9.9         0.0       10.6       0.2

Part (e)

The new data set has all of the same dimensions, but differs in the cases. It contains a subset of the cases from original dataset, with repetitions. In my specific scenario, it has multiple copies of 4 different cases.