Set the working directory:

  1. Download “StudentSurvey.csv” to your computer.
  2. Set Working directory to the folder you saved your file in.
  3. read the file using read.csv command.
#If your assignment does not render, you might need to install.packages("htmltools")

Instructions:

Read the StudentSurvey into this markdown and answers the following questions

#read the StudentSurvey.csv in here
student_survey<-read.csv("StudentSurvey.csv")

Check the data structure:

#check the head of the data set
str(student_survey)
## 'data.frame':    79 obs. of  17 variables:
##  $ Year      : chr  "Senior" "Sophomore" "FirstYear" "Junior" ...
##  $ Sex       : chr  "M" "F" "M" "M" ...
##  $ Smoke     : chr  "No" "Yes" "No" "No" ...
##  $ Award     : chr  "Olympic" "Academy" "Nobel" "Nobel" ...
##  $ HigherSAT : chr  "Math" "Math" "Math" "Math" ...
##  $ Exercise  : int  10 4 14 3 3 5 10 13 12 12 ...
##  $ TV        : int  1 7 5 1 3 4 10 8 1 6 ...
##  $ Height    : int  71 66 72 63 65 65 66 74 60 65 ...
##  $ Weight    : int  180 120 208 110 150 114 128 235 115 140 ...
##  $ Siblings  : int  4 2 2 1 1 2 1 1 7 1 ...
##  $ BirthOrder: int  4 2 1 1 1 2 1 1 8 2 ...
##  $ VerbalSAT : int  540 520 550 490 720 600 640 660 670 500 ...
##  $ MathSAT   : int  670 630 560 630 450 550 680 710 700 670 ...
##  $ SAT       : int  1210 1150 1110 1120 1170 1150 1320 1370 1370 1170 ...
##  $ GPA       : num  3.13 2.5 2.55 3.1 2.7 3.2 2.77 3.3 3.7 2.09 ...
##  $ Pulse     : int  54 66 130 78 40 80 94 77 94 63 ...
##  $ Piercings : int  0 3 0 0 6 4 8 0 2 2 ...
#check the dimensions
dim(student_survey)
## [1] 79 17
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#create a table of students'sex and "HigherSAT"
tab_x<-tibble(
  sex=c(student_survey$Sex),
  SAT_score=c(student_survey$HigherSAT)
)
tab_x
## # A tibble: 79 × 2
##    sex   SAT_score
##    <chr> <chr>    
##  1 M     Math     
##  2 F     Math     
##  3 M     Math     
##  4 M     Math     
##  5 F     Verbal   
##  6 F     Verbal   
##  7 F     Math     
##  8 M     Math     
##  9 F     Math     
## 10 F     Math     
## # ℹ 69 more rows
# Display summary statistics for VerbalSAT
summary(student_survey$VerbalSAT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   420.0   550.0   580.0   583.2   630.0   720.0
#Find the average GPA of students
avg_gpa <- mean(student_survey$GPA)
avg_gpa
## [1] 3.169114
#Create a new dataframe, call it "column_df". This new dataframe should contain students' weight and number of hours the exercise 
weight<-c(student_survey$Weight)
exercise_hour<-(student_survey$Exercise)
column_df<-data.frame(weight,exercise_hour)
column_df
##    weight exercise_hour
## 1     180            10
## 2     120             4
## 3     208            14
## 4     110             3
## 5     150             3
## 6     114             5
## 7     128            10
## 8     235            13
## 9     115            12
## 10    140            12
## 11    135             6
## 12    110            10
## 13     99             3
## 14    165             7
## 15    120             2
## 16    154            14
## 17    110            10
## 18    145            14
## 19    195            20
## 20    200             7
## 21    167            12
## 22    175            10
## 23    155             6
## 24    185            14
## 25    190            12
## 26    165            10
## 27    175             8
## 28    126             0
## 29    187            10
## 30    170             6
## 31    158             5
## 32    119            24
## 33    205             2
## 34    129            10
## 35    145             6
## 36    130             5
## 37    215             5
## 38    135            12
## 39    145             2
## 40     98             7
## 41    150            15
## 42    159             5
## 43    174             7
## 44    160            15
## 45    165             8
## 46    161            14
## 47    130             4
## 48    175            15
## 49    255             4
## 50    160            15
## 51    160             3
## 52     95             3
## 53    115            15
## 54    120            20
## 55    135             3
## 56    180             6
## 57    155            12
## 58    110             4
## 59    215            20
## 60    140            10
## 61    195            10
## 62    185             4
## 63    185             9
## 64    209            12
## 65    145             2
## 66    180             2
## 67    170             5
## 68    135             5
## 69    165             6
## 70    137            10
## 71    147             4
## 72    150             5
## 73    155            17
## 74    160             7
## 75    130             2
## 76    180             8
## 77    150             1
## 78    205            14
## 79    115            12
#Access the fourth element in the first column from the StudentSurvey's dataset.
student_survey[4,1]
## [1] "Junior"