Practice problems

Colleges <- data.frame(
  College = c("William and Mary", "Christopher Newport", "George Mason", "James Madison", "Longwood", 
              "Norfolk State", "Old Dominion", "Radford", "Mary Washington", "Virginia", 
              "Virginia Commonwealth", "Virginia Military Institute", "Virginia Tech", "Virginia State"),
  Employees = c(2104, 922, 4043, 2833, 746, 919, 2369, 1273, 721, 7431, 5825, 550, 7303, 761),
  TopSalary = c(425000, 381486, 536714, 428400, 328268, 295000, 448272, 312080, 449865, 561099, 
                503154, 364269, 500000, 356524),
  MedianSalary = c(56496, 47895, 63029, 53080, 52000, 49605, 54416, 51000, 53045, 60048, 
                   55000, 44999, 51656, 55925)
)
print(Colleges)
##                        College Employees TopSalary MedianSalary
## 1             William and Mary      2104    425000        56496
## 2          Christopher Newport       922    381486        47895
## 3                 George Mason      4043    536714        63029
## 4                James Madison      2833    428400        53080
## 5                     Longwood       746    328268        52000
## 6                Norfolk State       919    295000        49605
## 7                 Old Dominion      2369    448272        54416
## 8                      Radford      1273    312080        51000
## 9              Mary Washington       721    449865        53045
## 10                    Virginia      7431    561099        60048
## 11       Virginia Commonwealth      5825    503154        55000
## 12 Virginia Military Institute       550    364269        44999
## 13               Virginia Tech      7303    500000        51656
## 14              Virginia State       761    356524        55925

Soal 1 (Memilih entri pertama, ketiga, kesepuluh, dan kedua belas dari vektor TopSalary)

Colleges$TopSalary[c(1, 3, 10, 12)]
## [1] 425000 536714 561099 364269

Soal 2 (Memilih elemen MedianSalary dengan TopSalary > 400,000)

Colleges$MedianSalary[Colleges$TopSalary > 400000]
## [1] 56496 63029 53080 54416 53045 60048 55000 51656

Soal 3 (Memilih universitas dengan jumlah karyawan <= 1000)

Colleges[Colleges$Employees <= 1000, ]
##                        College Employees TopSalary MedianSalary
## 2          Christopher Newport       922    381486        47895
## 5                     Longwood       746    328268        52000
## 6                Norfolk State       919    295000        49605
## 9              Mary Washington       721    449865        53045
## 12 Virginia Military Institute       550    364269        44999
## 14              Virginia State       761    356524        55925

Soal 4 (Memilih 5 universitas secara acak)

Colleges[sample(1:nrow(Colleges), size = 5), ]
##            College Employees TopSalary MedianSalary
## 7     Old Dominion      2369    448272        54416
## 10        Virginia      7431    561099        60048
## 9  Mary Washington       721    449865        53045
## 13   Virginia Tech      7303    500000        51656
## 6    Norfolk State       919    295000        49605
Countries <- data.frame(
  Nation = c("China", "India", "United States", "Indonesia", "Brazil", "Pakistan", 
             "Nigeria", "Bangladesh", "Russia", "Mexico"),
  Region = c("Asia", "Asia", "North America", "Asia", "South America", "Asia", 
             "Africa", "Asia", "Europe", "North America"),
  Population = c(1409517397, 1339180127, 324459463, 263991379, 209288278, 
                 197015955, 190886311, 164669751, 143989754, 129163276),
  PctIncrease = c(0.40, 1.10, 0.70, 1.10, 0.80, 2.00, 2.60, 1.10, 0.00, 1.30),
  GDPcapita = c(8582, 1852, 57467, 3895, 10309, 1629, 2640, 1524, 10248, 8562)
)

# Melihat data frame
print(Countries)
##           Nation        Region Population PctIncrease GDPcapita
## 1          China          Asia 1409517397         0.4      8582
## 2          India          Asia 1339180127         1.1      1852
## 3  United States North America  324459463         0.7     57467
## 4      Indonesia          Asia  263991379         1.1      3895
## 5         Brazil South America  209288278         0.8     10309
## 6       Pakistan          Asia  197015955         2.0      1629
## 7        Nigeria        Africa  190886311         2.6      2640
## 8     Bangladesh          Asia  164669751         1.1      1524
## 9         Russia        Europe  143989754         0.0     10248
## 10        Mexico North America  129163276         1.3      8562

Soal 5 (Memilih baris dengan GDP per capita kurang dari 10000 dan bukan dari region Asia)

subset(Countries, GDPcapita < 10000 & Region != "Asia")
##     Nation        Region Population PctIncrease GDPcapita
## 7  Nigeria        Africa  190886311         2.6      2640
## 10  Mexico North America  129163276         1.3      8562

Soal 6 (Memilih sampel acak dari 3 negara)

Countries[sample(nrow(Countries), 3), ]
##          Nation        Region Population PctIncrease GDPcapita
## 3 United States North America  324459463         0.7     57467
## 7       Nigeria        Africa  190886311         2.6      2640
## 2         India          Asia 1339180127         1.1      1852

Soal 7 (Memilih negara dengan persentase peningkatan populasi lebih dari 1.5%)

subset(Countries, PctIncrease > 1.5)
##     Nation Region Population PctIncrease GDPcapita
## 6 Pakistan   Asia  197015955         2.0      1629
## 7  Nigeria Africa  190886311         2.6      2640
Olympics <- data.frame(
  Year = c(1992, 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018),
  Type = c("Summer", "Winter", "Winter", "Summer", "Winter", "Summer", "Winter", "Summer", 
           "Winter", "Summer", "Winter", "Summer", "Winter", "Summer", "Winter"),
  Host = c("Spain", "France", "Norway", "United States", "Japan", "Australia", "United States", 
           "Greece", "Italy", "China", "Canada", "United Kingdom", "Russia", "Brazil", "South Korea"),
  Competitors = c(9356, 1801, 1737, 10318, 2176, 10651, 2399, 10625, 2508, 10942, 2566, 10768, 2873, 11238, 2922),
  Events = c(257, 57, 61, 271, 68, 300, 78, 301, 84, 302, 86, 302, 98, 306, 102),
  Nations = c(169, 64, 67, 197, 72, 199, 78, 201, 80, 204, 82, 204, 88, 207, 92),
  Leader = c("Unified Team", "Germany", "Russia", "United States", "Germany", "United States", 
             "Norway", "United States", "Germany", "China", "Canada", "United States", 
             "Russia", "United States", "Norway")
)

print(Olympics)
##    Year   Type           Host Competitors Events Nations        Leader
## 1  1992 Summer          Spain        9356    257     169  Unified Team
## 2  1992 Winter         France        1801     57      64       Germany
## 3  1994 Winter         Norway        1737     61      67        Russia
## 4  1996 Summer  United States       10318    271     197 United States
## 5  1998 Winter          Japan        2176     68      72       Germany
## 6  2000 Summer      Australia       10651    300     199 United States
## 7  2002 Winter  United States        2399     78      78        Norway
## 8  2004 Summer         Greece       10625    301     201 United States
## 9  2006 Winter          Italy        2508     84      80       Germany
## 10 2008 Summer          China       10942    302     204         China
## 11 2010 Winter         Canada        2566     86      82        Canada
## 12 2012 Summer United Kingdom       10768    302     204 United States
## 13 2014 Winter         Russia        2873     98      88        Russia
## 14 2016 Summer         Brazil       11238    306     207 United States
## 15 2018 Winter    South Korea        2922    102      92        Norway

Soal 8 (Baris di mana Host adalah juga Leader)

host_leader <- Olympics[Olympics$Host == Olympics$Leader, ]
print(host_leader)
##    Year   Type          Host Competitors Events Nations        Leader
## 4  1996 Summer United States       10318    271     197 United States
## 10 2008 Summer         China       10942    302     204         China
## 11 2010 Winter        Canada        2566     86      82        Canada
## 13 2014 Winter        Russia        2873     98      88        Russia

Soal 9 ( Baris di mana Competitors per Event lebih besar dari 35)

competitors_per_event <- Olympics[Olympics$Competitors / Olympics$Events > 35, ]
print(competitors_per_event)
##    Year   Type           Host Competitors Events Nations        Leader
## 1  1992 Summer          Spain        9356    257     169  Unified Team
## 4  1996 Summer  United States       10318    271     197 United States
## 6  2000 Summer      Australia       10651    300     199 United States
## 8  2004 Summer         Greece       10625    301     201 United States
## 10 2008 Summer          China       10942    302     204         China
## 12 2012 Summer United Kingdom       10768    302     204 United States
## 14 2016 Summer         Brazil       11238    306     207 United States

Soal 10 ( Baris di Olimpiade Musim Dingin dengan jumlah Nations minimal 80)

winter_olympics_nations <- Olympics[Olympics$Type == "Winter" & Olympics$Nations >= 80, ]
print(winter_olympics_nations)
##    Year   Type        Host Competitors Events Nations  Leader
## 9  2006 Winter       Italy        2508     84      80 Germany
## 11 2010 Winter      Canada        2566     86      82  Canada
## 13 2014 Winter      Russia        2873     98      88  Russia
## 15 2018 Winter South Korea        2922    102      92  Norway