dataset = read.csv('wage2.csv')
summary(dataset)
## X wage hours IQ
## Min. : 1.0 Min. : 115.0 Min. :20.00 Min. : 50.0
## 1st Qu.:234.5 1st Qu.: 669.0 1st Qu.:40.00 1st Qu.: 92.0
## Median :468.0 Median : 905.0 Median :40.00 Median :102.0
## Mean :468.0 Mean : 957.9 Mean :43.93 Mean :101.3
## 3rd Qu.:701.5 3rd Qu.:1160.0 3rd Qu.:48.00 3rd Qu.:112.0
## Max. :935.0 Max. :3078.0 Max. :80.00 Max. :145.0
##
## KWW educ exper tenure
## Min. :12.00 Min. : 9.00 Min. : 1.00 Min. : 0.000
## 1st Qu.:31.00 1st Qu.:12.00 1st Qu.: 8.00 1st Qu.: 3.000
## Median :37.00 Median :12.00 Median :11.00 Median : 7.000
## Mean :35.74 Mean :13.47 Mean :11.56 Mean : 7.234
## 3rd Qu.:41.00 3rd Qu.:16.00 3rd Qu.:15.00 3rd Qu.:11.000
## Max. :56.00 Max. :18.00 Max. :23.00 Max. :22.000
##
## age married black south
## Min. :28.00 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:30.00 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :33.00 Median :1.000 Median :0.0000 Median :0.0000
## Mean :33.08 Mean :0.893 Mean :0.1283 Mean :0.3412
## 3rd Qu.:36.00 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :38.00 Max. :1.000 Max. :1.0000 Max. :1.0000
##
## urban sibs brthord meduc
## Min. :0.0000 Min. : 0.000 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 8.00
## Median :1.0000 Median : 2.000 Median : 2.000 Median :12.00
## Mean :0.7176 Mean : 2.941 Mean : 2.277 Mean :10.68
## 3rd Qu.:1.0000 3rd Qu.: 4.000 3rd Qu.: 3.000 3rd Qu.:12.00
## Max. :1.0000 Max. :14.000 Max. :10.000 Max. :18.00
## NA's :83 NA's :78
## feduc lwage
## Min. : 0.00 Min. :4.745
## 1st Qu.: 8.00 1st Qu.:6.506
## Median :10.00 Median :6.808
## Mean :10.22 Mean :6.779
## 3rd Qu.:12.00 3rd Qu.:7.056
## Max. :18.00 Max. :8.032
## NA's :194
mean_wage <- mean(dataset$wage)
sprintf("The mean wage is %.2f", mean_wage)
## [1] "The mean wage is 957.95"
median_wage <- median(dataset$wage)
sprintf("The median wage is %.2f", median_wage)
## [1] "The median wage is 905.00"
mean_age <- mean(dataset$age)
sprintf("The mean age is %.2f", mean_age)
## [1] "The mean age is 33.08"
median_age <- median(dataset$age)
sprintf("The median age is %.2f", median_age)
## [1] "The median age is 33.00"
df <- dataset[1:100, 1:10]
head(df)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df2 <- select(dataset,
wage, hours, educ, exper, tenure, age, married)
head(df2)
df3 <- filter(df2,
married ==1, age >30)
head(df3)
df3 <- rename(df3, Salary = wage, Hours = hours, Education = educ, Experience =exper, Tenure =tenure, Age = age, Married = married)
head(df3)
Question 4 ## Summary of the newly created dataframe df3
summary(df3)
## Salary Hours Education Experience Tenure
## Min. : 200 Min. :20.00 Min. : 9.00 Min. : 1.0 Min. : 0.000
## 1st Qu.: 732 1st Qu.:40.00 1st Qu.:12.00 1st Qu.: 9.0 1st Qu.: 3.000
## Median : 962 Median :40.00 Median :12.00 Median :13.0 Median : 8.000
## Mean :1013 Mean :44.03 Mean :13.49 Mean :12.6 Mean : 7.956
## 3rd Qu.:1202 3rd Qu.:48.00 3rd Qu.:16.00 3rd Qu.:16.0 3rd Qu.:12.000
## Max. :3078 Max. :80.00 Max. :18.00 Max. :23.0 Max. :22.000
## Age Married
## Min. :31.00 Min. :1
## 1st Qu.:32.00 1st Qu.:1
## Median :35.00 Median :1
## Mean :34.56 Mean :1
## 3rd Qu.:37.00 3rd Qu.:1
## Max. :38.00 Max. :1
mean_wage <- mean(df3$Wage)
## Warning in mean.default(df3$Wage): argument is not numeric or logical:
## returning NA
sprintf("The mean wage is %.2f", mean_wage)
## [1] "The mean wage is NA"
median_wage <- median(df3$Wage)
sprintf("The median wage is %.2f", median_wage)
## character(0)
mean_age <- mean(df3$Age)
sprintf("The mean age is %.2f", mean_age)
## [1] "The mean age is 34.56"
median_age <- median(df3$Age)
sprintf("The median age is %.2f", median_age)
## [1] "The median age is 35.00"
df4 <- dataset
df4$urban[df4$urban== 1] <-'Urban'
df4$urban[df4$urban== 0] <-'not-Urban'
df4$married[df4$married==1] <-'Married'
df4$married[df4$married==0] <-'UnMarried'
head(df4, 10)
##Question 6 all outputs contain more then 5 rows.
##Question 7 Bonus Question
library(readr)
##dfremote <- read_csv("https://github.com/jewelercart/R/blob/main/wage2.csv")
##head(dfremote)