#’ — #’ author: “Chelsea McElwee” #’ title: “Probelm Set 1 Assignment” #’ Due: “10/7/2022” #’ —
#installing readxl package because i will be using an excel file and
#then i have to bring up the library for the package i installed
#install.packages("readxl")
library(readxl)
#Reading and importing data from XL read_excel ("2014-2022 ONLY.xlsx")
#set working directory
#bring up libraries for packages that i will be using
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#install.packages("reader")
library(reader)
## Loading required package: NCmisc
##
## Attaching package: 'reader'
##
## The following objects are masked from 'package:NCmisc':
##
## cat.path, get.ext, rmv.ext
#Changing dataset name and adding it to my environment
library(readxl)
#X2014_2022_ONLY <- read_excel("2014-2022 ONLY.xlsx")
#View(X2014_2022_ONLY)
#will create vector for the columns I am interested in so i can trim down the dataset ##the variables I chose to isolate are Gender, race, grade and post self esteem scores ##now I want to select specific columns that I will be working with
#WSself <- dplyr::select(X2014_2022_ONLY, GENDER, RACE, GRADE, ROSPST)
#summary(WSself)
##The mean for post self esteem was 22.47 across all demographic variables#
library(stats)
#range(WSself$ROSPST)
given that 99s have been entered for NAs I would have to remove them to run the range
library(dplyr)
#WSselfGrades <-na_if (WSself, "99")
removing missing data
#WSselfGrades <- na.omit(WSselfGrades)
range and summary of the clean data
#{r} range(WSselfGrades$ROSPST) summary(WSselfGrades$ROSPST) table(WSselfGrades$ROSPST)
Selecting only two variables for the plot aspect #```{r} WSselfGs<-dplyr::select(WSselfGrades, GRADE,ROSPST)
Changing variable names: the student grades
#```{r}
names(WSselfGs)[names(WSselfGs) == "ROSPST"] <-("RosenbergSelfEsteemPOST")
names(WSselfGs)[names(WSselfGs) == "GRADE"] <-("StudentGrades")
Telling R whats a number versus factor
#{r} WSselfGs$StudentGrades <- as.factor(WSselfGs$StudentGrades) WSselfGrades$RosenbergSelfEsteemPOST <- as.numeric(WSselfGs$RosenbergSelfEsteemPOST)
#```{r} #install.packages(“dplyr”) libray(dplyr)
hist(WSselfGs$RosenbergSelfEsteemPOST, Data=“Histogram for Post Self Esteem Scores”, #Title xlab= “Grade Levels”, #X-axis name ylab=“Scores”, #y -axis name border=“black”, #Bar border color col=“Blue”,#Bar color xlim=c(4,8), #X-axis limits ylim=c(1,30)) #Y-axis limits
#CHANGE how the variables are labeled levels levels (WSselfGs$StudentGrades) <- c(“4th”,“5th”,“6th”,“7th”,“8th”)
#boxplot for botg variables of interest
boxplot(WSselfGs\(RosenbergSelfEsteemPOST~ WSselfGs\)StudentGrades,data= WSselfGs) ```