Content produced here is for R data wrangling/analysis purposes only. Views expressed are my own. The opinions expressed here belong solely to me and do not reflect the views of my employer.
.csv
is always preferred over .xlsx
.xlsx
data, remember to clean up the
headers & footers & irregular column names.|>
pipes are useful for a series of operations.raw_xlsx <- read_excel(path = table_path,
sheet = 1,
skip = 3,
n_max = 51)
raw_xlsx <- raw_xlsx |>
rename(s = 1,
p = 2,
a = 3,
c = 4)
total_ <- tail(raw_xlsx, n = 1) # last row as total
raw_xlsx <- raw_xlsx |>
filter(row_number() <= n() - 1) # exclude the last row
# confirm whether two values match
sum(raw_xlsx$p) == total_$p
## [1] TRUE
options(scipen = 999)
options(digits=20)
numerator <- raw_xlsx$p
n <- seq(1, 30, by = 1) # a reasonable range of numbers to show how iterations proceed in a nested loop
value_l <- vector(mode = "list", length = dim(raw_xlsx)[1])
# loop
for (i in 1:dim(raw_xlsx)[1]) {
value_l[[i]] <- numerator[i] / sqrt(n * (n - 1))
}
names(value_l) <- raw_xlsx$s
value_unlist <- unlist(value_l)
df_value <- data.frame(s = names(value_unlist),
p= unlist(value_l),
row.names = NULL) |>
arrange(desc(p))
"[0-9]+"
# parse number
df_value$s <- readr::parse_number(df_value$s) + 1
# remove 0-9 from labels
df_value$s <- gsub(pattern = "[0-9]+",replacement = "", df_value$s)