Problem-Set.knit

#’ — #’ author: “Chelsea McElwee” #’ title: “Probelm Set 1 Assignment” #’ Due: “10/7/2022” #’ —

#installing readxl package because i will be using an excel file and #then i have to bring up the library for the package i installed

install.packages(“readxl”) library(readxl)

#Reading and importing data from XL read_excel (“2014-2022 ONLY.xlsx”)

#set working directory

#bring up libraries for packages that i will be using library(tidyverse) install.packages(“reader”) library(reader)

#Changing dataset name and adding it to my environment WSDf <- read_excel(“2014-2022 ONLY.xlsx”)

#will create vector for the columns I am interested in so i can trim down the dataset ##the variables I chose to isolate are Gender, race, grade and post self esteem scores ##now I want to select specific columns that I will be working with

WSself <- dplyr::select(WSDf, GENDER, RACE, GRADE, ROSPST) summary(WSself)

##The mean for post self esteem was 22.47 across all demographic variables#

#library(stats)

range(WSself$ROSPST) #given that 99s have been entered for NAs I would have to remove them to run the range

WSself <- na_if(WSself, 99)

#library(visdat)

#removing missing data WSself <-na.omit(WSself)

#range of the clean data range(WSself$ROSPST) summary(WSself$ROSPST) table(WSself$ROSPST)

#Selecting only two variables for the plot aspect # WSselfGs <- dplyr::select(WSself, GRADE,ROSPST)

hist.default(WSselfGs$ROSPST)

names(WSselfGs)[names(WSselfGs) == “GRADE”] <- “StudentGrades” #the student grades

names(WSselfGs)[names(WSselfGs) == “ROSPST”] <- “RosenbergSelfEsteemPOST”

#install.packages(“naniar”) #library(naniar) save(WSselfGs,file=“NoNADataSetWSself.Rda”)

mean(WSselfGs$RosenbergSelfEsteemPOST) median(WSselfGs$RosenbergSelfEsteemPOST)

WSselfGs$StudentGrades <- as.factor(WSselfGs$StudentGrades) WSselfGs$RosenbergSelfEsteemPOST <- as.numeric(WSselfGs$RosenbergSelfEsteemPOST)

table(WSselfGs$RosenbergSelfEsteemPOST) mode(WSselfGs$RosenbergSelfEsteemPOST) var(WSselfGs$RosenbergSelfEsteemPOST) range(WSselfGs$RosenbergSelfEsteemPOST) sd(WSselfGs$RosenbergSelfEsteemPOST)

#install.packages(“dplyr”) #libray(dplyr)

hist(WSselfGs$RosenbergSelfEsteemPOST, Data=“Histogram for Post Self Esteem Scores”, #Title xlab= “Grade Levels”, #X-axis name ylab= “Scores”, #y -axis name border=“black”, #Bar border color col=“Blue”, #Bar color xlim=c(4,8), #X-axis limits ylim=c(1,30)) #Y-axis limits

#CHANGE how the variables are labeled levels(WSselfGs$StudentGrades) <- c(“4th”, “5th”, “6th”, “7th”, “8th”)

#boxplot for botg variables of interest boxplot(WSselfGs$RosenbergSelfEsteemPOST~WSselfGs$StudentGrades,data= WSselfGs)

rmarkdown::render(“analysis.R”, “pdf_document”)