This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl(or command)+Shift+Enter.


1. Call libraries used in this script

library(ggplot2)
library(tidyr)
library(dplyr)

2. Set the folder path and get the list of file names in the folder using list.files() function.

# Set the path to the folder containing CSV files
folder_path <- "/cloud/project/demo_tables - Copy/"
#folder_path <- "/Users/jennafrey/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/h2l2c_R/demo_tables/"

# Get a list of all CSV files in the folder using the list.files() function. The `pattern` argument allows us to select every file ending in .csv from the folder. 
csv_files <- list.files(folder_path, pattern = "\\.csv$", full.names = TRUE)

3. Read and combine each file into a single dataframe. First initiate your empty dataframe with the new column names.

# Create an empty data frame with column names
df <- setNames(data.frame(matrix(ncol = 7, nrow = 0)), c("name", "age", "eye_color","driver","height","units","favorite_pet"))


# Use a for loop to read each CSV file and add the data as a row to the empty dataframe
for (i in 1:length(csv_files)) {
  # Read the CSV file as a single line vector without headers
  row_data <- read.csv(csv_files[i], header = FALSE, nrows = 2,skip=1)
  
  # Convert the row_data to a row in the dataframe
  df[i, ] <- unlist(row_data)
}

#view a sample of your new df
head(df)

4. Simple tidying and review of df

#convert age and height to numeric instead of character values
df <- df %>%
  mutate(age = as.numeric(age),
         height = as.numeric(height))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `height = as.numeric(height)`.
Caused by warning:
! NAs introduced by coercion
#we re-examine our data and see that Bhavyaa's height was removed due to a special character. We'll manually re-add it.
df[3,5] <- 5.25
#Table of counts of each name
table(df$name)

Bhavyaa   Cindy   Devin    Gabi  Garima   Jenna Johanna  Kalynn Matthew  Meghan Nikolas   Nyssa 
      1       1       1       1       1       1       1       1       1       1       1       1 
  Yijun 
      1 
#general histogram of student ages 
hist(df$age)


#general summary
summary(df)
     name                age         eye_color            driver              height      
 Length:13          Min.   :21.00   Length:13          Length:13          Min.   :  5.25  
 Class :character   1st Qu.:25.00   Class :character   Class :character   1st Qu.: 68.00  
 Mode  :character   Median :27.00   Mode  :character   Mode  :character   Median :168.00  
                    Mean   :26.77                                         Mean   :129.60  
                    3rd Qu.:29.00                                         3rd Qu.:175.00  
                    Max.   :32.00                                         Max.   :182.00  
    units           favorite_pet      
 Length:13          Length:13         
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      

5. Using ggplot2

df %>% 
  ggplot(aes(x = name, y = age, fill = name)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = "none")


df %>% 
  ggplot(aes(x = name, y = driver, fill = name)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = "none")

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybChvciBjb21tYW5kKStTaGlmdCtFbnRlciouIAoKCi0tLQoKIyMjIyAxLiBDYWxsIGxpYnJhcmllcyB1c2VkIGluIHRoaXMgc2NyaXB0CmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoZHBseXIpCmBgYAoKCi0tLQotLS0KCiMjIyMgMi4gU2V0IHRoZSBmb2xkZXIgcGF0aCBhbmQgZ2V0IHRoZSBsaXN0IG9mIGZpbGUgbmFtZXMgaW4gdGhlIGZvbGRlciB1c2luZyBgbGlzdC5maWxlcygpYCBmdW5jdGlvbi4KCmBgYHtyfQojIFNldCB0aGUgcGF0aCB0byB0aGUgZm9sZGVyIGNvbnRhaW5pbmcgQ1NWIGZpbGVzCmZvbGRlcl9wYXRoIDwtICIvY2xvdWQvcHJvamVjdC9kZW1vX3RhYmxlcyAtIENvcHkvIgojZm9sZGVyX3BhdGggPC0gIi9Vc2Vycy9qZW5uYWZyZXkvTGlicmFyeS9DbG91ZFN0b3JhZ2UvT25lRHJpdmUtVW5pdmVyc2l0eW9mTm9ydGhDYXJvbGluYWF0Q2hhcGVsSGlsbC9oMmwyY19SL2RlbW9fdGFibGVzLyIKCiMgR2V0IGEgbGlzdCBvZiBhbGwgQ1NWIGZpbGVzIGluIHRoZSBmb2xkZXIgdXNpbmcgdGhlIGxpc3QuZmlsZXMoKSBmdW5jdGlvbi4gVGhlIGBwYXR0ZXJuYCBhcmd1bWVudCBhbGxvd3MgdXMgdG8gc2VsZWN0IGV2ZXJ5IGZpbGUgZW5kaW5nIGluIC5jc3YgZnJvbSB0aGUgZm9sZGVyLiAKY3N2X2ZpbGVzIDwtIGxpc3QuZmlsZXMoZm9sZGVyX3BhdGgsIHBhdHRlcm4gPSAiXFwuY3N2JCIsIGZ1bGwubmFtZXMgPSBUUlVFKQpgYGAKCgotLS0KLS0tCgoKIyMjIyAzLiBSZWFkIGFuZCBjb21iaW5lIGVhY2ggZmlsZSBpbnRvIGEgc2luZ2xlIGRhdGFmcmFtZS4gRmlyc3QgaW5pdGlhdGUgeW91ciBlbXB0eSBkYXRhZnJhbWUgd2l0aCB0aGUgbmV3IGNvbHVtbiBuYW1lcy4gCmBgYHtyfQojIENyZWF0ZSBhbiBlbXB0eSBkYXRhIGZyYW1lIHdpdGggY29sdW1uIG5hbWVzCmRmIDwtIHNldE5hbWVzKGRhdGEuZnJhbWUobWF0cml4KG5jb2wgPSA3LCBucm93ID0gMCkpLCBjKCJuYW1lIiwgImFnZSIsICJleWVfY29sb3IiLCJkcml2ZXIiLCJoZWlnaHQiLCJ1bml0cyIsImZhdm9yaXRlX3BldCIpKQoKCiMgVXNlIGEgZm9yIGxvb3AgdG8gcmVhZCBlYWNoIENTViBmaWxlIGFuZCBhZGQgdGhlIGRhdGEgYXMgYSByb3cgdG8gdGhlIGVtcHR5IGRhdGFmcmFtZQpmb3IgKGkgaW4gMTpsZW5ndGgoY3N2X2ZpbGVzKSkgewogICMgUmVhZCB0aGUgQ1NWIGZpbGUgYXMgYSBzaW5nbGUgbGluZSB2ZWN0b3Igd2l0aG91dCBoZWFkZXJzCiAgcm93X2RhdGEgPC0gcmVhZC5jc3YoY3N2X2ZpbGVzW2ldLCBoZWFkZXIgPSBGQUxTRSwgbnJvd3MgPSAyLHNraXA9MSkKICAKICAjIENvbnZlcnQgdGhlIHJvd19kYXRhIHRvIGEgcm93IGluIHRoZSBkYXRhZnJhbWUKICBkZltpLCBdIDwtIHVubGlzdChyb3dfZGF0YSkKfQoKI3ZpZXcgYSBzYW1wbGUgb2YgeW91ciBuZXcgZGYKaGVhZChkZikKYGBgCgoKLS0tCgojIyMjIDQuIFNpbXBsZSB0aWR5aW5nIGFuZCByZXZpZXcgb2YgZGYgCmBgYHtyfQojY29udmVydCBhZ2UgYW5kIGhlaWdodCB0byBudW1lcmljIGluc3RlYWQgb2YgY2hhcmFjdGVyIHZhbHVlcwpkZiA8LSBkZiAlPiUKICBtdXRhdGUoYWdlID0gYXMubnVtZXJpYyhhZ2UpLAogICAgICAgICBoZWlnaHQgPSBhcy5udW1lcmljKGhlaWdodCkpCgojd2UgcmUtZXhhbWluZSBvdXIgZGF0YSBhbmQgc2VlIHRoYXQgQmhhdnlhYSdzIGhlaWdodCB3YXMgcmVtb3ZlZCBkdWUgdG8gYSBzcGVjaWFsIGNoYXJhY3Rlci4gV2UnbGwgbWFudWFsbHkgcmUtYWRkIGl0LgpkZlszLDVdIDwtIDUuMjUKYGBgCgoKYGBge3J9CiNUYWJsZSBvZiBjb3VudHMgb2YgZWFjaCBuYW1lCnRhYmxlKGRmJG5hbWUpCgojZ2VuZXJhbCBoaXN0b2dyYW0gb2Ygc3R1ZGVudCBhZ2VzIApoaXN0KGRmJGFnZSkKCiNnZW5lcmFsIHN1bW1hcnkKc3VtbWFyeShkZikKYGBgCgoKLS0tCgojIyMjIDUuIFVzaW5nIGdncGxvdDIKYGBge3J9CmRmICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBuYW1lLCB5ID0gYWdlLCBmaWxsID0gbmFtZSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpICsKICBndWlkZXMoZmlsbCA9ICJub25lIikKCmRmICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBuYW1lLCB5ID0gZHJpdmVyLCBmaWxsID0gbmFtZSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpICsKICBndWlkZXMoZmlsbCA9ICJub25lIikKYGBgCgo=