Load Data(w/Bonus)

urldata="https://raw.githubusercontent.com/kglan/MS-Data-Science-Bridge-/main/R/CPS1988.csv"
wagedatarough<-read_csv(url(urldata))
## New names:
## * `` -> ...1
## Rows: 28155 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (4): ethnicity, smsa, region, parttime
## dbl (4): ...1, wage, education, experience
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
wagedatarough
## # A tibble: 28,155 x 8
##     ...1  wage education experience ethnicity smsa  region    parttime
##    <dbl> <dbl>     <dbl>      <dbl> <chr>     <chr> <chr>     <chr>   
##  1     1  355.         7         45 cauc      yes   northeast no      
##  2     2  123.        12          1 cauc      yes   northeast yes     
##  3     3  370.         9          9 cauc      yes   northeast no      
##  4     4  755.        11         46 cauc      yes   northeast no      
##  5     5  594.        12         36 cauc      yes   northeast no      
##  6     6  377.        16         22 cauc      yes   northeast no      
##  7     7  285.         8         51 cauc      yes   northeast no      
##  8     8  561.        12         34 cauc      yes   northeast no      
##  9     9  264.        12          0 cauc      yes   northeast no      
## 10    10 1644.        14         18 cauc      yes   northeast no      
## # ... with 28,145 more rows

Question 1

summary(wagedatarough)
##       ...1            wage            education       experience  
##  Min.   :    1   Min.   :   50.05   Min.   : 0.00   Min.   :-4.0  
##  1st Qu.: 7040   1st Qu.:  308.64   1st Qu.:12.00   1st Qu.: 8.0  
##  Median :14078   Median :  522.32   Median :12.00   Median :16.0  
##  Mean   :14078   Mean   :  603.73   Mean   :13.07   Mean   :18.2  
##  3rd Qu.:21117   3rd Qu.:  783.48   3rd Qu.:15.00   3rd Qu.:27.0  
##  Max.   :28155   Max.   :18777.20   Max.   :18.00   Max.   :63.0  
##   ethnicity             smsa              region            parttime        
##  Length:28155       Length:28155       Length:28155       Length:28155      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
meanedu1 <- mean(wagedatarough$education)
mededu1 <- median(wagedatarough$education)
meanexp1<- mean(wagedatarough$experience)
medexp1<- median(wagedatarough$experience)

meanedu1
## [1] 13.06787
mededu1
## [1] 12
meanexp1
## [1] 18.19993
medexp1
## [1] 16

Question 2,3,5

colnames(wagedatarough)
## [1] "...1"       "wage"       "education"  "experience" "ethnicity" 
## [6] "smsa"       "region"     "parttime"
wagedata <- wagedatarough%>%
  select(-"...1")%>%
  rename(Wage_week = "wage",
         Education_yrs = "education",
         Experience_yrs = "experience",
         Ethnicity = "ethnicity",
         Urban_Area = "smsa",
         Region = "region",
         PartTime= "parttime")%>%
  mutate(Salary = as.numeric(Wage_week*52), 
         .before = Education_yrs)%>%
  filter(wagedatarough$parttime == "no")%>%
  select(-PartTime)%>%
  mutate(Ethnicity = str_replace(Ethnicity,"cauc",  "White"))%>%
  mutate(Ethnicity = str_replace(Ethnicity,"afam",  "Black"))

wagedata
## # A tibble: 25,631 x 7
##    Wage_week Salary Education_yrs Experience_yrs Ethnicity Urban_Area Region   
##        <dbl>  <dbl>         <dbl>          <dbl> <chr>     <chr>      <chr>    
##  1      355. 18457.             7             45 White     yes        northeast
##  2      370. 19259.             9              9 White     yes        northeast
##  3      755. 39257.            11             46 White     yes        northeast
##  4      594. 30864.            12             36 White     yes        northeast
##  5      377. 19616.            16             22 White     yes        northeast
##  6      285. 14815.             8             51 White     yes        northeast
##  7      561. 29179.            12             34 White     yes        northeast
##  8      264. 13731.            12              0 White     yes        northeast
##  9     1644. 85479.            14             18 White     yes        northeast
## 10      475. 24691.            12             17 White     yes        northeast
## # ... with 25,621 more rows

Question #4

summary(wagedata)
##    Wage_week            Salary       Education_yrs   Experience_yrs 
##  Min.   :   50.39   Min.   :  2620   Min.   : 0.00   Min.   :-4.00  
##  1st Qu.:  356.13   1st Qu.: 18519   1st Qu.:12.00   1st Qu.: 9.00  
##  Median :  567.23   Median : 29496   Median :12.00   Median :16.00  
##  Mean   :  640.16   Mean   : 33288   Mean   :13.08   Mean   :18.59  
##  3rd Qu.:  826.21   3rd Qu.: 42963   3rd Qu.:16.00   3rd Qu.:27.00  
##  Max.   :18777.20   Max.   :976414   Max.   :18.00   Max.   :63.00  
##   Ethnicity          Urban_Area           Region         
##  Length:25631       Length:25631       Length:25631      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
meanedu2 <- mean(wagedata$Education_yrs)
mededu2 <- median(wagedata$Education_yrs)
meanexp2 <- mean(wagedata$Experience_yrs)
medexp2 <- median(wagedata$Experience_yrs)

meanedu2
## [1] 13.07627
mededu2
## [1] 12
meanexp2
## [1] 18.58656
medexp2
## [1] 16
compare(meanedu1,meanedu2)
## FALSE
compare(mededu1, mededu2)
## TRUE
compare(meanexp1, meanexp2)
## FALSE
compare(medexp1,medexp2)
## TRUE

Question 5,6

wagedata[order(-wagedata$Experience_yrs),]
## # A tibble: 25,631 x 7
##    Wage_week Salary Education_yrs Experience_yrs Ethnicity Urban_Area Region 
##        <dbl>  <dbl>         <dbl>          <dbl> <chr>     <chr>      <chr>  
##  1     370.  19259.             0             63 White     yes        south  
##  2     166.   8642.             2             61 Black     no         south  
##  3     356.  18519.             0             60 Black     yes        midwest
##  4     368   19136              0             58 White     yes        midwest
##  5     206.  10700.             3             58 Black     no         south  
##  6     286.  14894.             7             57 White     yes        south  
##  7     712.  37037              3             57 White     yes        south  
##  8      92.6  4815.             4             57 White     yes        south  
##  9     237.  12346.             2             57 White     yes        south  
## 10     353.  18376.             5             57 White     yes        west   
## # ... with 25,621 more rows