1. Use the summary function to gain an overview of the data set.

#Then display the mean and median for at least two attributes.

#check your working directory

#import your Rdataset.csv into R studio

homework <- read.csv(file = "CATestscores.csv", header = TRUE, sep = ",")

summary(homework)
##        X            district        school             county         
##  Min.   :  1.0   Min.   :61382   Length:420         Length:420        
##  1st Qu.:105.8   1st Qu.:64308   Class :character   Class :character  
##  Median :210.5   Median :67760   Mode  :character   Mode  :character  
##  Mean   :210.5   Mean   :67473                                        
##  3rd Qu.:315.2   3rd Qu.:70419                                        
##  Max.   :420.0   Max.   :75440                                        
##     grades             students          teachers          calworks     
##  Length:420         Min.   :   81.0   Min.   :   4.85   Min.   : 0.000  
##  Class :character   1st Qu.:  379.0   1st Qu.:  19.66   1st Qu.: 4.395  
##  Mode  :character   Median :  950.5   Median :  48.56   Median :10.520  
##                     Mean   : 2628.8   Mean   : 129.07   Mean   :13.246  
##                     3rd Qu.: 3008.0   3rd Qu.: 146.35   3rd Qu.:18.981  
##                     Max.   :27176.0   Max.   :1429.00   Max.   :78.994  
##      lunch           computer       expenditure       income      
##  Min.   :  0.00   Min.   :   0.0   Min.   :3926   Min.   : 5.335  
##  1st Qu.: 23.28   1st Qu.:  46.0   1st Qu.:4906   1st Qu.:10.639  
##  Median : 41.75   Median : 117.5   Median :5215   Median :13.728  
##  Mean   : 44.71   Mean   : 303.4   Mean   :5312   Mean   :15.317  
##  3rd Qu.: 66.86   3rd Qu.: 375.2   3rd Qu.:5601   3rd Qu.:17.629  
##  Max.   :100.00   Max.   :3324.0   Max.   :7712   Max.   :55.328  
##     english            read            math      
##  Min.   : 0.000   Min.   :604.5   Min.   :605.4  
##  1st Qu.: 1.941   1st Qu.:640.4   1st Qu.:639.4  
##  Median : 8.778   Median :655.8   Median :652.5  
##  Mean   :15.768   Mean   :655.0   Mean   :653.3  
##  3rd Qu.:22.970   3rd Qu.:668.7   3rd Qu.:665.9  
##  Max.   :85.540   Max.   :704.0   Max.   :709.5
mean(homework$english)
## [1] 15.76816
median(homework$english)
## [1] 8.777634
mean(homework$math)
## [1] 653.3426
median(homework$math)
## [1] 652.45

#2. Create a new data frame with a subset of the columns and rows. #Make sure to rename it.

df.CA <- data.frame(homework[sample(1:nrow(homework), 10), c(11:12)])
names(df.CA) <- c("expenditure", "income")
row.names(df.CA) <- 1:10
df.CA
##    expenditure   income
## 1     5220.370 15.96800
## 2     5447.345  7.38500
## 3     6180.149 49.93900
## 4     4842.608  9.97200
## 5     5482.677 13.24300
## 6     5179.645 15.29700
## 7     7593.406 35.81000
## 8     5179.411 14.06200
## 9     5741.463 41.73411
## 10    5501.955  8.97800

#3. Create new column names for the new data frame.

df.CA <- setNames(df.CA, c("Spending Money", "Income"))
df.CA
##    Spending Money   Income
## 1        5220.370 15.96800
## 2        5447.345  7.38500
## 3        6180.149 49.93900
## 4        4842.608  9.97200
## 5        5482.677 13.24300
## 6        5179.645 15.29700
## 7        7593.406 35.81000
## 8        5179.411 14.06200
## 9        5741.463 41.73411
## 10       5501.955  8.97800

#4. Use the summary function to create an overview of your new data frame. #The print the mean and median for the same two attributes. Please compare.

summary(df.CA)
##  Spending Money     Income      
##  Min.   :4843   Min.   : 7.385  
##  1st Qu.:5190   1st Qu.:10.790  
##  Median :5465   Median :14.680  
##  Mean   :5637   Mean   :21.239  
##  3rd Qu.:5682   3rd Qu.:30.849  
##  Max.   :7593   Max.   :49.939

#Mean and Median of Spending Money

mean(df.CA$`Spending Money`)
## [1] 5636.903
median(df.CA$`Spending Money`)
## [1] 5465.011
mean(df.CA$Income)
## [1] 21.23881
median(df.CA$Income)
## [1] 14.6795

#5. For at least 3 values in a column please rename so that every value in that #column is renamed.For example, suppose I have 20 values of the letter “e” in one column. Rename those values so #that all 20 would show as “excellent”.

homework <- cbind(homework, school = factor(NA, levels = c("a", "b", "c")))

#6 Display enough rows to see examples of all of steps 1-5 above I showed for School

head(homework, 20)
##     X district                          school      county grades students
## 1   1    75119              Sunol Glen Unified     Alameda  KK-08      195
## 2   2    61499            Manzanita Elementary       Butte  KK-08      240
## 3   3    61549     Thermalito Union Elementary       Butte  KK-08     1550
## 4   4    61457 Golden Feather Union Elementary       Butte  KK-08      243
## 5   5    61523        Palermo Union Elementary       Butte  KK-08     1335
## 6   6    62042         Burrel Union Elementary      Fresno  KK-08      137
## 7   7    68536           Holt Union Elementary San Joaquin  KK-08      195
## 8   8    63834             Vineland Elementary        Kern  KK-08      888
## 9   9    62331        Orange Center Elementary      Fresno  KK-08      379
## 10 10    67306     Del Paso Heights Elementary  Sacramento  KK-06     2247
## 11 11    65722       Le Grand Union Elementary      Merced  KK-08      446
## 12 12    62174          West Fresno Elementary      Fresno  KK-08      987
## 13 13    71795          Allensworth Elementary      Tulare  KK-08      103
## 14 14    72181      Sunnyside Union Elementary      Tulare  KK-08      487
## 15 15    72298            Woodville Elementary      Tulare  KK-08      649
## 16 16    72041         Pixley Union Elementary      Tulare  KK-08      852
## 17 17    63594     Lost Hills Union Elementary        Kern  KK-08      491
## 18 18    63370   Buttonwillow Union Elementary        Kern  KK-08      421
## 19 19    64709               Lennox Elementary Los Angeles  KK-08     6880
## 20 20    63560               Lamont Elementary        Kern  KK-08     2688
##    teachers calworks    lunch computer expenditure    income   english  read
## 1     10.90   0.5102   2.0408       67    6384.911 22.690001  0.000000 691.6
## 2     11.15  15.4167  47.9167      101    5099.381  9.824000  4.583333 660.5
## 3     82.90  55.0323  76.3226      169    5501.955  8.978000 30.000002 636.3
## 4     14.00  36.4754  77.0492       85    7101.831  8.978000  0.000000 651.9
## 5     71.50  33.1086  78.4270      171    5235.988  9.080333 13.857677 641.8
## 6      6.40  12.3188  86.9565       25    5580.147 10.415000 12.408759 605.7
## 7     10.00  12.9032  94.6237       28    5253.331  6.577000 68.717949 604.5
## 8     42.50  18.8063 100.0000       66    4565.746  8.174000 46.959461 605.5
## 9     19.00  32.1900  93.1398       35    5355.548  7.385000 30.079157 608.9
## 10   108.00  78.9942  87.3164        0    5036.211 11.613333 40.275921 611.9
## 11    21.00  18.6099  85.8744       86    4547.692  8.931000 52.914799 612.8
## 12    47.00  71.7131  98.6056       56    5447.345  7.385000 54.609932 616.6
## 13     5.00  22.4299  98.1308       25    6567.149  5.335000 42.718445 612.8
## 14    24.34  24.6094  77.1484        0    4818.613  8.279000 20.533880 610.0
## 15    36.00  14.6379  76.2712       31    5621.456  9.630000 80.123260 611.9
## 16    42.07  24.2142  94.2957       80    6026.360  7.454000 49.413143 614.8
## 17    28.92  11.2016  97.7597      100    6723.238  6.216000 85.539719 611.7
## 18    25.50   8.5511  77.9097       50    5589.885  7.764000 58.907364 614.9
## 19   303.03  21.2824  94.9712      960    5064.616  7.022000 77.005814 619.1
## 20   135.00  23.4375  93.2292      139    5433.593  5.699000 49.813988 621.3
##     math school
## 1  690.0   <NA>
## 2  661.9   <NA>
## 3  650.9   <NA>
## 4  643.5   <NA>
## 5  639.9   <NA>
## 6  605.4   <NA>
## 7  609.0   <NA>
## 8  612.5   <NA>
## 9  616.1   <NA>
## 10 613.4   <NA>
## 11 618.7   <NA>
## 12 616.0   <NA>
## 13 619.8   <NA>
## 14 622.6   <NA>
## 15 621.0   <NA>
## 16 619.9   <NA>
## 17 624.4   <NA>
## 18 621.7   <NA>
## 19 620.5   <NA>
## 20 619.3   <NA>