Load undergraduate survey data

library(readr)
# load the undergrad data in to a dataframe
undergraduate = read_csv("~/Desktop/mydata/undergrad.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Timestamp = col_character(),
##   `The following tools are important to my future career. [Excel]` = col_character(),
##   `The following tools are important to my future career. [Access]` = col_character(),
##   `The following tools are important to my future career. [Statistics]` = col_character(),
##   `The following tools are important to my future career. [A programming language]` = col_character(),
##   `How likely are you to take another information systems course at Stern?` = col_double(),
##   `How likely are you to take a computer science course outside of Stern?` = col_double(),
##   `Which topics would you like to learn more about? (select all that apply)` = col_character(),
##   `What new information systems courses and/or topics would you like to see offered at Stern?` = col_character(),
##   `If available, how likely would you be to take an online course at Stern?` = col_double(),
##   `What are your areas of concentration?` = col_character()
## )
# view the data frame
undergraduate
## # A tibble: 39 x 11
##    Timestamp `The following … `The following … `The following … `The following …
##    <chr>     <chr>            <chr>            <chr>            <chr>           
##  1 12/13/20… Agree            Neither agree o… Neither agree o… Neither agree o…
##  2 12/13/20… Strongly Agree   Disagree         Somewhat agree   Strongly Agree  
##  3 12/13/20… Strongly Agree   Somewhat agree   Strongly Agree   Strongly Agree  
##  4 12/13/20… Strongly Agree   Strongly Agree   Strongly Agree   Agree           
##  5 12/13/20… Agree            Somewhat agree   Strongly Agree   Agree           
##  6 12/13/20… Agree            Neither agree o… Somewhat agree   Somewhat agree  
##  7 12/13/20… Somewhat agree   Disagree         Agree            Strongly Agree  
##  8 12/13/20… Strongly Agree   Somewhat agree   Agree            Strongly Agree  
##  9 12/13/20… Strongly Agree   Neither agree o… Strongly Agree   Somewhat agree  
## 10 12/13/20… Strongly Agree   Neither agree o… Somewhat agree   Somewhat disagr…
## # … with 29 more rows, and 6 more variables:
## #   How likely are you to take another information systems course at Stern? <dbl>,
## #   How likely are you to take a computer science course outside of Stern? <dbl>,
## #   Which topics would you like to learn more about? (select all that apply) <chr>,
## #   What new information systems courses and/or topics would you like to see offered at Stern? <chr>,
## #   If available, how likely would you be to take an online course at Stern? <dbl>,
## #   What are your areas of concentration? <chr>
# view the names of the dataframe
names(undergraduate)
##  [1] "Timestamp"                                                                                 
##  [2] "The following tools are important to my future career. [Excel]"                            
##  [3] "The following tools are important to my future career. [Access]"                           
##  [4] "The following tools are important to my future career. [Statistics]"                       
##  [5] "The following tools are important to my future career. [A programming language]"           
##  [6] "How likely are you to take another information systems course at Stern?"                   
##  [7] "How likely are you to take a computer science course outside of Stern?"                    
##  [8] "Which topics would you like to learn more about? (select all that apply)"                  
##  [9] "What new information systems courses and/or topics would you like to see offered at Stern?"
## [10] "If available, how likely would you be to take an online course at Stern?"                  
## [11] "What are your areas of concentration?"
# check the dimension of the dataframe 
dim(undergraduate)
## [1] 39 11

Rename column names to readable concise format

# rename the columns of undergrad data frame
names(undergraduate) = c("date", "excel", "access", "statistics", "programming", "info_system", "comp_science", "learning_topics", "new_info_system", "online", "concentration_area")

# view the renamed columns to verify
names(undergraduate)
##  [1] "date"               "excel"              "access"            
##  [4] "statistics"         "programming"        "info_system"       
##  [7] "comp_science"       "learning_topics"    "new_info_system"   
## [10] "online"             "concentration_area"

Creating ordered factor variables for excel, statistics and programming column of undergrad dataset

# view the class of excel, statistics and programming
class(undergraduate$excel)
## [1] "character"
class(undergraduate$statistics)
## [1] "character"
class(undergraduate$programming)
## [1] "character"
# create ordered factor variables for excel
undergraduate$excel = as.factor(undergraduate$excel)
undergraduate$excel
##  [1] Agree          Strongly Agree Strongly Agree Strongly Agree Agree         
##  [6] Agree          Somewhat agree Strongly Agree Strongly Agree Strongly Agree
## [11] Strongly Agree Strongly Agree Strongly Agree Somewhat agree Strongly Agree
## [16] Strongly Agree Somewhat agree Agree          Strongly Agree Strongly Agree
## [21] Agree          Strongly Agree Strongly Agree Strongly Agree Agree         
## [26] Agree          Strongly Agree Strongly Agree Strongly Agree Strongly Agree
## [31] Agree          Strongly Agree Strongly Agree Strongly Agree Agree         
## [36] Strongly Agree Strongly Agree Strongly Agree Strongly Agree
## Levels: Agree Somewhat agree Strongly Agree
table(undergraduate$excel)
## 
##          Agree Somewhat agree Strongly Agree 
##              9              3             27
excel_ordered = ordered(x=undergraduate$excel, levels= c("Strongly disagree","Disagree","Somewhat disagree","Neither agree or disagree","Somewhat agree","Agree","Strongly Agree"))

# view the class of new ordered excel variable
class(excel_ordered)
## [1] "ordered" "factor"
# view the frequency of responses in the new ordered excel variable using table function
table(excel_ordered)
## excel_ordered
##         Strongly disagree                  Disagree         Somewhat disagree 
##                         0                         0                         0 
## Neither agree or disagree            Somewhat agree                     Agree 
##                         0                         3                         9 
##            Strongly Agree 
##                        27
# create ordered factor variables for statistics
undergraduate$statistics = as.factor(undergraduate$statistics)
undergraduate$statistics
##  [1] Neither agree or disagree Somewhat agree           
##  [3] Strongly Agree            Strongly Agree           
##  [5] Strongly Agree            Somewhat agree           
##  [7] Agree                     Agree                    
##  [9] Strongly Agree            Somewhat agree           
## [11] Strongly Agree            Strongly Agree           
## [13] Somewhat agree            Somewhat agree           
## [15] Agree                     Agree                    
## [17] Agree                     Neither agree or disagree
## [19] Strongly Agree            Strongly Agree           
## [21] Agree                     Strongly Agree           
## [23] Agree                     Somewhat agree           
## [25] Agree                     <NA>                     
## [27] Somewhat agree            Strongly Agree           
## [29] Agree                     Strongly Agree           
## [31] Disagree                  Strongly Agree           
## [33] Strongly Agree            Agree                    
## [35] Agree                     Agree                    
## [37] Agree                     Strongly Agree           
## [39] Strongly Agree           
## 5 Levels: Agree Disagree Neither agree or disagree ... Strongly Agree
table(undergraduate$statistics)
## 
##                     Agree                  Disagree Neither agree or disagree 
##                        13                         1                         2 
##            Somewhat agree            Strongly Agree 
##                         7                        15
statistics_ordered = ordered(x=undergraduate$statistics, levels= c("Strongly disagree","Disagree","Somewhat disagree","Neither agree or disagree","Somewhat agree","Agree","Strongly Agree"))

# view the class of new ordered statistics variable
class(statistics_ordered)
## [1] "ordered" "factor"
# view the frequency of responses in the new ordered excel variable using table function
table(statistics_ordered)
## statistics_ordered
##         Strongly disagree                  Disagree         Somewhat disagree 
##                         0                         1                         0 
## Neither agree or disagree            Somewhat agree                     Agree 
##                         2                         7                        13 
##            Strongly Agree 
##                        15
# create ordered factor variables for programming
undergraduate$programming = as.factor(undergraduate$programming)
undergraduate$programming
##  [1] Neither agree or disagree Strongly Agree           
##  [3] Strongly Agree            Agree                    
##  [5] Agree                     Somewhat agree           
##  [7] Strongly Agree            Strongly Agree           
##  [9] Somewhat agree            Somewhat disagree        
## [11] Agree                     Strongly Agree           
## [13] Strongly Agree            Somewhat disagree        
## [15] Agree                     Somewhat agree           
## [17] Strongly Agree            Neither agree or disagree
## [19] Strongly Agree            Agree                    
## [21] Agree                     Strongly Agree           
## [23] Agree                     Somewhat agree           
## [25] Neither agree or disagree Neither agree or disagree
## [27] Somewhat agree            Strongly Agree           
## [29] Agree                     Agree                    
## [31] Disagree                  Strongly Agree           
## [33] Agree                     Agree                    
## [35] Agree                     Somewhat agree           
## [37] Neither agree or disagree Strongly Agree           
## [39] Strongly Agree           
## 6 Levels: Agree Disagree Neither agree or disagree ... Strongly Agree
table(undergraduate$programming)
## 
##                     Agree                  Disagree Neither agree or disagree 
##                        12                         1                         5 
##            Somewhat agree         Somewhat disagree            Strongly Agree 
##                         6                         2                        13
programming_ordered = ordered(x=undergraduate$programming, levels= c("Strongly disagree","Disagree","Somewhat disagree","Neither agree or disagree","Somewhat agree","Agree","Strongly Agree"))

# view the class of new ordered statistics variable
class(programming_ordered)
## [1] "ordered" "factor"
# view the frequency of responses in the new ordered excel variable using table function
table(programming_ordered)
## programming_ordered
##         Strongly disagree                  Disagree         Somewhat disagree 
##                         0                         1                         2 
## Neither agree or disagree            Somewhat agree                     Agree 
##                         5                         6                        12 
##            Strongly Agree 
##                        13

Histograms of ordered excel, statistics and programming variables

# histogram for ordered excel variable
hist(as.numeric(excel_ordered),col = "#A17BD0", xlab = "Bins by reponse category", main = "Responses to the level of importance of learning Microsoft Excel", breaks = 7, labels = TRUE, ylim = c(min = 0, max = 30), border = "#FFFFFF")

# histogram for ordered statistics variable
hist(as.numeric(statistics_ordered),col = "#D379A6", xlab = "Bins by reponse category", main = "Responses to the level of importance of learning Statistics", breaks = 7, labels = TRUE, ylim = c(min = 0, max = 20), border = "#FFFFFF")

# histogram for ordered programming variable
hist(as.numeric(programming_ordered),col = "#F9BE73", xlab = "Bins by reponse category", main = "Responses to the level of importance of learning Programming", breaks = 7, labels = TRUE, ylim = c(min = 0, max = 15), border = "#FFFFFF")

# bonus: show all three histograms in one row

par(mfrow = c(1,3))
hist(as.numeric(excel_ordered),col = "#A17BD0", xlab = "Bins by reponse category", main = "Learning Microsoft Excel", breaks = 7, labels = TRUE, ylim = c(min = 0, max = 30), border = "#FFFFFF")
hist(as.numeric(statistics_ordered),col = "#D379A6", xlab = "Bins by reponse category", main = "Learning Statistics", breaks = 7, labels = TRUE, ylim = c(min = 0, max = 20), border = "#FFFFFF")
hist(as.numeric(programming_ordered),col = "#F9BE73", xlab = "Bins by reponse category", main = "Learning Programming", breaks = 7, labels = TRUE, ylim = c(min = 0, max = 15), border = "#FFFFFF")