PROBLEM SET 1

#’ — #’ author: “Chelsea McElwee” #’ title: “Probelm Set 1 Assignment” #’ Due: “10/7/2022” #’ —

#installing readxl package because i will be using an excel file and
#then i have to bring up the library for the package i installed

#install.packages("readxl") 
library(readxl)

#Reading and importing data from XL read_excel ("2014-2022 ONLY.xlsx")

#set working directory

#bring up libraries for packages that i will be using 

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

#install.packages("reader")
library(reader)

## Loading required package: NCmisc
## 
## Attaching package: 'reader'
## 
## The following objects are masked from 'package:NCmisc':
## 
##     cat.path, get.ext, rmv.ext

#Changing dataset name and adding it to my environment

library(readxl)
#X2014_2022_ONLY <- read_excel("2014-2022 ONLY.xlsx")
#View(X2014_2022_ONLY)

#will create vector for the columns I am interested in so i can trim down the dataset ##the variables I chose to isolate are Gender, race, grade and post self esteem scores ##now I want to select specific columns that I will be working with

#WSself <- dplyr::select(X2014_2022_ONLY, GENDER, RACE, GRADE, ROSPST)
#summary(WSself)

##The mean for post self esteem was 22.47 across all demographic variables#

library(stats)

#range(WSself$ROSPST)

given that 99s have been entered for NAs I would have to remove them to run the range

library(dplyr)
#WSselfGrades <-na_if (WSself, "99")

removing missing data

#WSselfGrades <- na.omit(WSselfGrades)

range and summary of the clean data #{r} range(WSselfGrades$ROSPST) summary(WSselfGrades$ROSPST) table(WSselfGrades$ROSPST)

Selecting only two variables for the plot aspect #```{r} WSselfGs<-dplyr::select(WSselfGrades, GRADE,ROSPST)


Changing variable names: the student grades
#```{r}
names(WSselfGs)[names(WSselfGs) == "ROSPST"] <-("RosenbergSelfEsteemPOST")
names(WSselfGs)[names(WSselfGs) == "GRADE"] <-("StudentGrades")

Telling R whats a number versus factor #{r} WSselfGs$StudentGrades <- as.factor(WSselfGs$StudentGrades) WSselfGrades$RosenbergSelfEsteemPOST <- as.numeric(WSselfGs$RosenbergSelfEsteemPOST)

#```{r} #install.packages(“dplyr”) libray(dplyr)

hist(WSselfGs$RosenbergSelfEsteemPOST, Data=“Histogram for Post Self Esteem Scores”, #Title xlab= “Grade Levels”, #X-axis name ylab=“Scores”, #y -axis name border=“black”, #Bar border color col=“Blue”,#Bar color xlim=c(4,8), #X-axis limits ylim=c(1,30)) #Y-axis limits

#CHANGE how the variables are labeled levels levels (WSselfGs$StudentGrades) <- c(“4th”,“5th”,“6th”,“7th”,“8th”)

#boxplot for botg variables of interest

boxplot(WSselfGs$RosenbergSelfEsteemPOST~ WSselfGs$StudentGrades,data= WSselfGs) ```

PROBLEM SET 1

2022-10-14