#’ — #’ author: “Chelsea McElwee” #’ title: “Probelm Set 1 Assignment” #’ Due: “10/7/2022” #’ —
#installing readxl package because i will be using an excel file and #then i have to bring up the library for the package i installed
install.packages(“readxl”) library(readxl)
#Reading and importing data from XL read_excel (“2014-2022 ONLY.xlsx”)
#set working directory
#bring up libraries for packages that i will be using library(tidyverse) install.packages(“reader”) library(reader)
#Changing dataset name and adding it to my environment WSDf <- read_excel(“2014-2022 ONLY.xlsx”)
#will create vector for the columns I am interested in so i can trim down the dataset ##the variables I chose to isolate are Gender, race, grade and post self esteem scores ##now I want to select specific columns that I will be working with
WSself <- dplyr::select(WSDf, GENDER, RACE, GRADE, ROSPST) summary(WSself)
##The mean for post self esteem was 22.47 across all demographic variables#
#library(stats)
range(WSself$ROSPST) #given that 99s have been entered for NAs I would have to remove them to run the range
WSself <- na_if(WSself, 99)
#library(visdat)
#removing missing data WSself <-na.omit(WSself)
#range of the clean data range(WSself\(ROSPST) summary(WSself\)ROSPST) table(WSself$ROSPST)
#Selecting only two variables for the plot aspect # WSselfGs <- dplyr::select(WSself, GRADE,ROSPST)
hist.default(WSselfGs$ROSPST)
names(WSselfGs)[names(WSselfGs) == “GRADE”] <- “StudentGrades” #the student grades
names(WSselfGs)[names(WSselfGs) == “ROSPST”] <- “RosenbergSelfEsteemPOST”
#install.packages(“naniar”) #library(naniar) save(WSselfGs,file=“NoNADataSetWSself.Rda”)
mean(WSselfGs\(RosenbergSelfEsteemPOST) median(WSselfGs\)RosenbergSelfEsteemPOST)
WSselfGs\(StudentGrades <- as.factor(WSselfGs\)StudentGrades) WSselfGs\(RosenbergSelfEsteemPOST <- as.numeric(WSselfGs\)RosenbergSelfEsteemPOST)
table(WSselfGs\(RosenbergSelfEsteemPOST) mode(WSselfGs\)RosenbergSelfEsteemPOST) var(WSselfGs\(RosenbergSelfEsteemPOST) range(WSselfGs\)RosenbergSelfEsteemPOST) sd(WSselfGs$RosenbergSelfEsteemPOST)
#install.packages(“dplyr”) #libray(dplyr)
hist(WSselfGs$RosenbergSelfEsteemPOST, Data=“Histogram for Post Self Esteem Scores”, #Title xlab= “Grade Levels”, #X-axis name ylab= “Scores”, #y -axis name border=“black”, #Bar border color col=“Blue”, #Bar color xlim=c(4,8), #X-axis limits ylim=c(1,30)) #Y-axis limits
#CHANGE how the variables are labeled levels(WSselfGs$StudentGrades) <- c(“4th”, “5th”, “6th”, “7th”, “8th”)
#boxplot for botg variables of interest boxplot(WSselfGs\(RosenbergSelfEsteemPOST~WSselfGs\)StudentGrades,data= WSselfGs)
rmarkdown::render(“analysis.R”, “pdf_document”)