Set the working directory:

  1. Download “StudentSurvey.csv” to your computer.
  2. Set Working directory to the folder you saved your file in.
  3. read the file using read.csv command.
#If your assignment does not render, you might need to install.packages("htmltools")

Instructions:

Read the StudentSurvey into this markdown and answers the following questions

#read the StudentSurvey.csv in here
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
StudentSurvey <- read_csv ("StudentSurvey.csv")
## Rows: 79 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): Year, Sex, Smoke, Award, HigherSAT
## dbl (12): Exercise, TV, Height, Weight, Siblings, BirthOrder, VerbalSAT, Mat...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Check the data structure:

#check the head of the data set
head(StudentSurvey)
## # A tibble: 6 × 17
##   Year      Sex   Smoke Award   HigherSAT Exercise    TV Height Weight Siblings
##   <chr>     <chr> <chr> <chr>   <chr>        <dbl> <dbl>  <dbl>  <dbl>    <dbl>
## 1 Senior    M     No    Olympic Math            10     1     71    180        4
## 2 Sophomore F     Yes   Academy Math             4     7     66    120        2
## 3 FirstYear M     No    Nobel   Math            14     5     72    208        2
## 4 Junior    M     No    Nobel   Math             3     1     63    110        1
## 5 Sophomore F     No    Nobel   Verbal           3     3     65    150        1
## 6 Sophomore F     No    Nobel   Verbal           5     4     65    114        2
## # ℹ 7 more variables: BirthOrder <dbl>, VerbalSAT <dbl>, MathSAT <dbl>,
## #   SAT <dbl>, GPA <dbl>, Pulse <dbl>, Piercings <dbl>
#check the dimensions
str(StudentSurvey)
## spc_tbl_ [79 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Year      : chr [1:79] "Senior" "Sophomore" "FirstYear" "Junior" ...
##  $ Sex       : chr [1:79] "M" "F" "M" "M" ...
##  $ Smoke     : chr [1:79] "No" "Yes" "No" "No" ...
##  $ Award     : chr [1:79] "Olympic" "Academy" "Nobel" "Nobel" ...
##  $ HigherSAT : chr [1:79] "Math" "Math" "Math" "Math" ...
##  $ Exercise  : num [1:79] 10 4 14 3 3 5 10 13 12 12 ...
##  $ TV        : num [1:79] 1 7 5 1 3 4 10 8 1 6 ...
##  $ Height    : num [1:79] 71 66 72 63 65 65 66 74 60 65 ...
##  $ Weight    : num [1:79] 180 120 208 110 150 114 128 235 115 140 ...
##  $ Siblings  : num [1:79] 4 2 2 1 1 2 1 1 7 1 ...
##  $ BirthOrder: num [1:79] 4 2 1 1 1 2 1 1 8 2 ...
##  $ VerbalSAT : num [1:79] 540 520 550 490 720 600 640 660 670 500 ...
##  $ MathSAT   : num [1:79] 670 630 560 630 450 550 680 710 700 670 ...
##  $ SAT       : num [1:79] 1210 1150 1110 1120 1170 1150 1320 1370 1370 1170 ...
##  $ GPA       : num [1:79] 3.13 2.5 2.55 3.1 2.7 3.2 2.77 3.3 3.7 2.09 ...
##  $ Pulse     : num [1:79] 54 66 130 78 40 80 94 77 94 63 ...
##  $ Piercings : num [1:79] 0 3 0 0 6 4 8 0 2 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Year = col_character(),
##   ..   Sex = col_character(),
##   ..   Smoke = col_character(),
##   ..   Award = col_character(),
##   ..   HigherSAT = col_character(),
##   ..   Exercise = col_double(),
##   ..   TV = col_double(),
##   ..   Height = col_double(),
##   ..   Weight = col_double(),
##   ..   Siblings = col_double(),
##   ..   BirthOrder = col_double(),
##   ..   VerbalSAT = col_double(),
##   ..   MathSAT = col_double(),
##   ..   SAT = col_double(),
##   ..   GPA = col_double(),
##   ..   Pulse = col_double(),
##   ..   Piercings = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
#create a table of students'sex and "HigherSAT"
table(StudentSurvey$Sex, StudentSurvey$HigherSAT)
##    
##     Math Verbal
##   F   25     15
##   M   24     15
# Display summary statistics for VerbalSAT
summary(StudentSurvey$VerbalSAT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   420.0   550.0   580.0   583.2   630.0   720.0
#Find the average GPA of students
mean(StudentSurvey$GPA)
## [1] 3.169114
#Create a new dataframe, call it "column_df". This new dataframe should contain students' weight and number of hours the exercise 
column_df <- StudentSurvey[c("Weight", "Exercise")]


head(column_df)
## # A tibble: 6 × 2
##   Weight Exercise
##    <dbl>    <dbl>
## 1    180       10
## 2    120        4
## 3    208       14
## 4    110        3
## 5    150        3
## 6    114        5
#Access the fourth element in the first column from the StudentSurvey's dataset.
StudentSurvey[4,1]
## # A tibble: 1 × 1
##   Year  
##   <chr> 
## 1 Junior