Question 1
library(readr)
## Loads the "readr" package to use read_csv function. Package was already initially installed.
horse_colic_data <- read_csv("https://www4.stat.ncsu.edu/~online/ST308/Data/cmrojas_horse-colic.csv")
## Reads the horse-colic data set from the URL using a read comma separated value function. Lastly, it was assigned as a variable named, "horse_colic_data"
abdomen_variable<-horse_colic_data$Abdomen
## Extracts just the Abdomen variable from the tibble, and assign as "abdomen_variable"
mean_abdomen<-mean(abdomen_variable, na.rm = TRUE)
## This final line of code illustrates the mean of the "abdomen_variable", and also removes any NA values by typing "na.rm = TRUE". The assigned name is "mean_abdomen".
print(mean_abdomen)
## [1] 3.573034
Question 2
library(readr)
## Loads the "readr" package using library() to use the read_tsv() function later in the code.
hepatitis_url <- "https://www4.stat.ncsu.edu/~online/ST308/Data/cmrojas_hepatitis.tsv"
## Assigns the provided URL a variable using "<-" as "hepatitis_url".
download.file(hepatitis_url, destfile = "hepatitis_data.tsv")
## Download.file downloads the provided data locally instead of directly using the URL
hepatitis_data <- read_tsv("hepatitis_data.tsv", col_names = FALSE)
## Rows: 77 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## dbl (7): X1, X2, X3, X4, X5, X6, X7
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## This reads the data into R using a tab separated value function and it is denoted as "hepatitis_data".
colnames(hepatitis_data) <- c("MALAISE", "SGOT", "VARICES", "ANOREXIA", "ALK_PHOSPHATE", "SPLEEN_PALPABLE", "HISTOLOGY")
## Inserts various column names using the colnames() function and creating a vector for it using c().
print(hepatitis_data, n = 24)
## # A tibble: 77 × 7
## MALAISE SGOT VARICES ANOREXIA ALK_PHOSPHATE SPLEEN_PALPABLE HISTOLOGY
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 25 2 2 256 1 2
## 2 2 31 2 2 85 2 1
## 3 2 58 2 2 138 1 2
## 4 1 20 1 2 86 2 2
## 5 2 48 2 2 NA 2 1
## 6 2 24 2 2 NA 2 2
## 7 1 45 2 1 NA 2 1
## 8 2 24 2 2 30 2 1
## 9 2 NA 2 2 NA 2 1
## 10 2 34 2 2 NA 2 2
## 11 2 23 1 2 84 2 2
## 12 1 60 2 2 NA 2 1
## 13 2 64 2 2 102 2 1
## 14 1 18 2 1 76 2 2
## 15 1 75 2 2 85 1 1
## 16 1 20 2 2 75 2 2
## 17 2 20 2 2 85 2 1
## 18 1 242 1 1 NA 2 2
## 19 2 92 2 2 58 2 1
## 20 1 28 2 2 120 1 2
## 21 1 14 2 1 NA 2 1
## 22 1 420 2 2 191 2 1
## 23 1 NA 2 2 NA 2 2
## 24 2 58 2 2 26 2 1
## # ℹ 53 more rows
## This displays the first 24 variables of the hepatitis_data by setting n to n = 24.
Question 3
library(readxl)
##Loads the "readxl" package to later use the read_excel() function
yeast_url <- "https://www4.stat.ncsu.edu/~online/ST308/Data/cmrojas_yeast.xlsx"
##Defines the provided URL using "<-" as "yeast_url".
download.file(yeast_url, destfile = "yeast_data.xlsx", mode = "wb")
##Downloaded provided data locally using "download.file". Mode = "wb" is used to ensure original content is conserved.
yeast_data <- read_excel("yeast_data.xlsx")
##Reads the "yeast_data.xlsx" file into R and is regarded as "yeast_data" using the "<-"
subset_data <- yeast_data[1:5, 2]
##Subset the tibble using "subset_data()" to return just the second column and the first 5 rows of the data.
print(subset_data)
## # A tibble: 5 × 1
## seq_name
## <chr>
## 1 TBA3_YEAST
## 2 PRC6_YEAST
## 3 CHS3_YEAST
## 4 YCD8_YEAST
## 5 STL1_YEAST