This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl(or command)+Shift+Enter.
library(ggplot2)
library(tidyr)
library(dplyr)
list.files() function.# Set the path to the folder containing CSV files
folder_path <- "/cloud/project/demo_tables - Copy/"
#folder_path <- "/Users/jennafrey/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/h2l2c_R/demo_tables/"
# Get a list of all CSV files in the folder using the list.files() function. The `pattern` argument allows us to select every file ending in .csv from the folder.
csv_files <- list.files(folder_path, pattern = "\\.csv$", full.names = TRUE)
# Create an empty data frame with column names
df <- setNames(data.frame(matrix(ncol = 7, nrow = 0)), c("name", "age", "eye_color","driver","height","units","favorite_pet"))
# Use a for loop to read each CSV file and add the data as a row to the empty dataframe
for (i in 1:length(csv_files)) {
# Read the CSV file as a single line vector without headers
row_data <- read.csv(csv_files[i], header = FALSE, nrows = 2,skip=1)
# Convert the row_data to a row in the dataframe
df[i, ] <- unlist(row_data)
}
#view a sample of your new df
head(df)
#convert age and height to numeric instead of character values
df <- df %>%
mutate(age = as.numeric(age),
height = as.numeric(height))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `height = as.numeric(height)`.
Caused by warning:
! NAs introduced by coercion
#we re-examine our data and see that Bhavyaa's height was removed due to a special character. We'll manually re-add it.
df[3,5] <- 5.25
#Table of counts of each name
table(df$name)
Bhavyaa Cindy Devin Gabi Garima Jenna Johanna Kalynn Matthew Meghan Nikolas Nyssa
1 1 1 1 1 1 1 1 1 1 1 1
Yijun
1
#general histogram of student ages
hist(df$age)
#general summary
summary(df)
name age eye_color driver height
Length:13 Min. :21.00 Length:13 Length:13 Min. : 5.25
Class :character 1st Qu.:25.00 Class :character Class :character 1st Qu.: 68.00
Mode :character Median :27.00 Mode :character Mode :character Median :168.00
Mean :26.77 Mean :129.60
3rd Qu.:29.00 3rd Qu.:175.00
Max. :32.00 Max. :182.00
units favorite_pet
Length:13 Length:13
Class :character Class :character
Mode :character Mode :character
df %>%
ggplot(aes(x = name, y = age, fill = name)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = "none")
df %>%
ggplot(aes(x = name, y = driver, fill = name)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = "none")