Week 1 Exercises
- Download & import the csv file located at: https://bradleyboehmke.github.io/public/data/reddit.csv
library(printr)
setwd("C:/Users/Anitha/Downloads/")
reddit <- read.csv("reddit.csv")
head(reddit)
| 1 |
0 |
25-34 |
NA |
Employed full time |
NA |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
getmotivated |
NA |
NA |
| 2 |
0 |
25-34 |
NA |
Employed full time |
NA |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
gaming |
NA |
NA |
| 3 |
1 |
18-24 |
NA |
Freelance |
NA |
No |
Some college |
United States |
Virginia |
Under $20,000 |
snackexchange |
NA |
NA |
| 4 |
0 |
25-34 |
NA |
Freelance |
NA |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
spacedicks |
NA |
NA |
| 5 |
1 |
25-34 |
NA |
Employed full time |
NA |
No |
Bachelor’s degree |
United States |
California |
$70,000 - $99,999 |
aww |
NA |
NA |
| 6 |
0 |
25-34 |
Married/civil union/domestic partnership |
Employed full time |
No |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
gaming |
I like dogs. |
Cheddar |
str(reddit)
## 'data.frame': 32754 obs. of 14 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : int 0 0 1 0 1 0 0 0 0 0 ...
## $ age.range : Factor w/ 7 levels "18-24","25-34",..: 2 2 1 2 2 2 2 1 3 2 ...
## $ marital.status : Factor w/ 6 levels "Engaged","Forever Alone",..: NA NA NA NA NA 4 3 4 4 3 ...
## $ employment.status: Factor w/ 6 levels "Employed full time",..: 1 1 2 2 1 1 1 4 1 2 ...
## $ military.service : Factor w/ 2 levels "No","Yes": NA NA NA NA NA 1 1 1 1 1 ...
## $ children : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ education : Factor w/ 7 levels "Associate degree",..: 2 2 5 2 2 2 5 2 2 5 ...
## $ country : Factor w/ 439 levels " Canada"," Canada eh",..: 394 394 394 394 394 394 125 394 394 125 ...
## $ state : Factor w/ 53 levels "","Alabama","Alaska",..: 33 33 48 33 6 33 1 6 33 1 ...
## $ income.range : Factor w/ 8 levels "$100,000 - $149,999",..: 2 2 8 2 7 2 NA 7 2 7 ...
## $ fav.reddit : Factor w/ 1834 levels "","'home' page (or front page if you prefer)",..: 720 691 1511 1528 188 691 1318 571 1629 1 ...
## $ dog.cat : Factor w/ 3 levels "I like cats.",..: NA NA NA NA NA 2 2 2 1 1 ...
## $ cheese : Factor w/ 11 levels "American","Brie",..: NA NA NA NA NA 3 3 1 10 7 ...
2.Now import the above csv file directly from the url provided (without downloading to your local hard drive)
library(printr)
url <- "https://bradleyboehmke.github.io/public/data/reddit.csv"
reddit2 <- read.csv(url, stringsAsFactors = FALSE)
head(reddit2)
| 1 |
0 |
25-34 |
NA |
Employed full time |
NA |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
getmotivated |
NA |
NA |
| 2 |
0 |
25-34 |
NA |
Employed full time |
NA |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
gaming |
NA |
NA |
| 3 |
1 |
18-24 |
NA |
Freelance |
NA |
No |
Some college |
United States |
Virginia |
Under $20,000 |
snackexchange |
NA |
NA |
| 4 |
0 |
25-34 |
NA |
Freelance |
NA |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
spacedicks |
NA |
NA |
| 5 |
1 |
25-34 |
NA |
Employed full time |
NA |
No |
Bachelor’s degree |
United States |
California |
$70,000 - $99,999 |
aww |
NA |
NA |
| 6 |
0 |
25-34 |
Married/civil union/domestic partnership |
Employed full time |
No |
No |
Bachelor’s degree |
United States |
New York |
$150,000 or more |
gaming |
I like dogs. |
Cheddar |
str(reddit2)
## 'data.frame': 32754 obs. of 14 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ gender : int 0 0 1 0 1 0 0 0 0 0 ...
## $ age.range : chr "25-34" "25-34" "18-24" "25-34" ...
## $ marital.status : chr NA NA NA NA ...
## $ employment.status: chr "Employed full time" "Employed full time" "Freelance" "Freelance" ...
## $ military.service : chr NA NA NA NA ...
## $ children : chr "No" "No" "No" "No" ...
## $ education : chr "Bachelor's degree" "Bachelor's degree" "Some college" "Bachelor's degree" ...
## $ country : chr "United States" "United States" "United States" "United States" ...
## $ state : chr "New York" "New York" "Virginia" "New York" ...
## $ income.range : chr "$150,000 or more" "$150,000 or more" "Under $20,000" "$150,000 or more" ...
## $ fav.reddit : chr "getmotivated" "gaming" "snackexchange" "spacedicks" ...
## $ dog.cat : chr NA NA NA NA ...
## $ cheese : chr NA NA NA NA ...
- Import the .xlsx file located at: http://www.huduser.gov/portal/datasets/fmr/fmr2017/FY2017_4050_FMR.xlsx
library(printr)
library(readxl)
setwd("C:/Users/Anitha/Downloads/")
fy2017 <- read_excel("FY2017_4050_FMR.xlsx", sheet = "EXCEL_DATA")
head(fy2017)
| 2300512300 |
NA |
1078 |
755 |
851 |
1454 |
1579 |
23 |
METRO38860MM6400 |
Portland, ME HUD Metro FMR Area |
NA |
12300 |
Cumberland County |
Chebeague Island town |
341 |
1109 |
ME |
40 |
1 |
0.9720469 |
-31 |
| 6099999999 |
NA |
677 |
502 |
506 |
987 |
1038 |
60 |
NCNTY60999N60999 |
American Samoa |
999 |
99999 |
American Samoa |
American Samoa |
55519 |
653 |
AS |
40 |
0 |
1.0367534 |
24 |
| 6999999999 |
NA |
666 |
411 |
498 |
961 |
1158 |
69 |
NCNTY69999N69999 |
Northern Mariana Islands |
999 |
99999 |
Northern Mariana Islands |
Northern Mariana Islands |
53883 |
642 |
MP |
40 |
0 |
1.0373832 |
24 |
| 0100199999 |
0100199999 |
822 |
587 |
682 |
1054 |
1425 |
1 |
METRO33860M33860 |
Montgomery, AL MSA |
1 |
99999 |
Autauga County |
Autauga County |
54571 |
788 |
AL |
40 |
1 |
1.0431472 |
34 |
| 0100399999 |
0100399999 |
977 |
807 |
847 |
1422 |
1634 |
1 |
METRO19300M19300 |
Daphne-Fairhope-Foley, AL MSA |
3 |
99999 |
Baldwin County |
Baldwin County |
182265 |
873 |
AL |
40 |
1 |
1.1191294 |
104 |
| 0100599999 |
0100599999 |
671 |
501 |
505 |
839 |
958 |
1 |
NCNTY01005N01005 |
Barbour County, AL |
5 |
99999 |
Barbour County |
Barbour County |
27457 |
636 |
AL |
40 |
0 |
1.0550314 |
35 |
str(fy2017)
## Classes 'tbl_df', 'tbl' and 'data.frame': 4769 obs. of 21 variables:
## $ fips2010 : chr "2300512300" "6099999999" "6999999999" "0100199999" ...
## $ fips2000 : chr NA NA NA "0100199999" ...
## $ fmr2 : num 1078 677 666 822 977 ...
## $ fmr0 : num 755 502 411 587 807 501 665 665 491 464 ...
## $ fmr1 : num 851 506 498 682 847 505 751 751 494 467 ...
## $ fmr3 : num 1454 987 961 1054 1422 ...
## $ fmr4 : num 1579 1038 1158 1425 1634 ...
## $ State : num 23 60 69 1 1 1 1 1 1 1 ...
## $ Metro_code : chr "METRO38860MM6400" "NCNTY60999N60999" "NCNTY69999N69999" "METRO33860M33860" ...
## $ areaname : chr "Portland, ME HUD Metro FMR Area" "American Samoa" "Northern Mariana Islands" "Montgomery, AL MSA" ...
## $ county : num NA 999 999 1 3 5 7 9 11 13 ...
## $ CouSub : chr "12300" "99999" "99999" "99999" ...
## $ countyname : chr "Cumberland County" "American Samoa" "Northern Mariana Islands" "Autauga County" ...
## $ county_town_name : chr "Chebeague Island town" "American Samoa" "Northern Mariana Islands" "Autauga County" ...
## $ pop2010 : num 341 55519 53883 54571 182265 ...
## $ acs_2016_2 : num 1109 653 642 788 873 ...
## $ state_alpha : chr "ME" "AS" "MP" "AL" ...
## $ fmr_type : num 40 40 40 40 40 40 40 40 40 40 ...
## $ metro : num 1 0 0 1 1 0 1 1 0 0 ...
## $ FMR_PCT_Change : num 0.972 1.037 1.037 1.043 1.119 ...
## $ FMR_Dollar_Change: num -31 24 24 34 104 35 26 26 52 52 ...
4.Now import the above .xlsx file directly from the url provided (without downloading to your local hard drive)
library(printr)
library(gdata)
fy20172 <- read.xls("http://www.huduser.gov/portal/datasets/fmr/fmr2017/FY2017_4050_FMR.xlsx")
head(fy20172)
| 2300512300 |
NA |
1078 |
755 |
851 |
1454 |
1579 |
23 |
METRO38860MM6400 |
Portland, ME HUD Metro FMR Area |
NA |
12300 |
Cumberland County |
Chebeague Island town |
341 |
1109 |
ME |
40 |
1 |
0.9720469 |
-31 |
| 6099999999 |
NA |
677 |
502 |
506 |
987 |
1038 |
60 |
NCNTY60999N60999 |
American Samoa |
999 |
99999 |
American Samoa |
American Samoa |
55519 |
653 |
AS |
40 |
0 |
1.0367534 |
24 |
| 6999999999 |
NA |
666 |
411 |
498 |
961 |
1158 |
69 |
NCNTY69999N69999 |
Northern Mariana Islands |
999 |
99999 |
Northern Mariana Islands |
Northern Mariana Islands |
53883 |
642 |
MP |
40 |
0 |
1.0373832 |
24 |
| 100199999 |
100199999 |
822 |
587 |
682 |
1054 |
1425 |
1 |
METRO33860M33860 |
Montgomery, AL MSA |
1 |
99999 |
Autauga County |
Autauga County |
54571 |
788 |
AL |
40 |
1 |
1.0431472 |
34 |
| 100399999 |
100399999 |
977 |
807 |
847 |
1422 |
1634 |
1 |
METRO19300M19300 |
Daphne-Fairhope-Foley, AL MSA |
3 |
99999 |
Baldwin County |
Baldwin County |
182265 |
873 |
AL |
40 |
1 |
1.1191294 |
104 |
| 100599999 |
100599999 |
671 |
501 |
505 |
839 |
958 |
1 |
NCNTY01005N01005 |
Barbour County, AL |
5 |
99999 |
Barbour County |
Barbour County |
27457 |
636 |
AL |
40 |
0 |
1.0550314 |
35 |
str(fy20172)
## 'data.frame': 4769 obs. of 21 variables:
## $ fips2010 : num 2.3e+09 6.1e+09 7.0e+09 1.0e+08 1.0e+08 ...
## $ fips2000 : num NA NA NA 1e+08 1e+08 ...
## $ fmr2 : int 1078 677 666 822 977 671 866 866 621 621 ...
## $ fmr0 : int 755 502 411 587 807 501 665 665 491 464 ...
## $ fmr1 : int 851 506 498 682 847 505 751 751 494 467 ...
## $ fmr3 : int 1454 987 961 1054 1422 839 1163 1163 853 849 ...
## $ fmr4 : int 1579 1038 1158 1425 1634 958 1298 1298 856 1094 ...
## $ State : int 23 60 69 1 1 1 1 1 1 1 ...
## $ Metro_code : Factor w/ 2598 levels "METRO10180M10180",..: 451 2592 2594 384 160 625 55 55 626 627 ...
## $ areaname : Factor w/ 2598 levels " Santa Ana-Anaheim-Irvine, CA HUD Metro FMR Area",..: 1903 52 1723 1633 571 122 186 186 263 271 ...
## $ county : int NA 999 999 1 3 5 7 9 11 13 ...
## $ CouSub : int 12300 99999 99999 99999 99999 99999 99999 99999 99999 99999 ...
## $ countyname : Factor w/ 1961 levels "Abbeville County",..: 462 41 1265 92 99 110 163 178 239 249 ...
## $ county_town_name : Factor w/ 3175 levels "Abbeville County",..: 533 60 2024 136 149 165 254 277 386 401 ...
## $ pop2010 : int 341 55519 53883 54571 182265 27457 22915 57322 10914 20947 ...
## $ acs_2016_2 : int 1109 653 642 788 873 636 840 840 569 569 ...
## $ state_alpha : Factor w/ 56 levels "AK","AL","AR",..: 24 4 28 2 2 2 2 2 2 2 ...
## $ fmr_type : int 40 40 40 40 40 40 40 40 40 40 ...
## $ metro : int 1 0 0 1 1 0 1 1 0 0 ...
## $ FMR_PCT_Change : num 0.972 1.037 1.037 1.043 1.119 ...
## $ FMR_Dollar_Change: int -31 24 24 34 104 35 26 26 52 52 ...
5.Go to this University of Dayton webpage http://academic.udayton.edu/kissock/http/Weather/citylistUS.htm, scroll down to Ohio and import the Cincinnati (OHCINCIN.txt) file
library(printr)
cincinnati <- read.table("http://academic.udayton.edu/kissock/http/Weather/gsod95-current/OHCINCIN.txt")
head(cincinnati)
| 1 |
1 |
1995 |
41.1 |
| 1 |
2 |
1995 |
22.2 |
| 1 |
3 |
1995 |
22.8 |
| 1 |
4 |
1995 |
14.9 |
| 1 |
5 |
1995 |
9.5 |
| 1 |
6 |
1995 |
23.8 |
str(cincinnati)
## 'data.frame': 7963 obs. of 4 variables:
## $ V1: int 1 1 1 1 1 1 1 1 1 1 ...
## $ V2: int 1 2 3 4 5 6 7 8 9 10 ...
## $ V3: int 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 ...
## $ V4: num 41.1 22.2 22.8 14.9 9.5 23.8 31.1 26.9 31.3 31.5 ...