link
path<-'C:/Users/nfabius/Desktop/data mining/'
file_name<-"Employment_by_Congressional_District.csv"
Data<-read.csv(paste(path,file_name,sep=""))
d<-Data[Data$NAICS_Decription=="Construction",]
d<- d[order(d$Employment,decreasing=TRUE),]
1)
LA 6, Construction, 58288 is the district with the most jobs on construction
2)
mine<-Data[Data$Congressional_District=="NY 10",]
NY 10, Mining, quarrying, and oil and gas extraction, 0 is the lowest NY 10, Finance and insurance, 109148 is the highest
3)
colnames(Data)[1]<-"CD"
4)
Data$state<-substr(Data$CD, 1, 2)
Data$digit<-parse_number(Data$CD)
Data$CD_United<- paste(Data$state, Data$digit, sep=" ")
data<-Data[1:10, ]
5)
knitr::kable(data, caption = "Table with kable")
Table with kable
| AK 1 |
Agriculture, forestry, fishing and hunting |
819 |
AK |
1 |
AK 1 |
| AK 1 |
Mining, quarrying, and oil and gas extraction |
13566 |
AK |
1 |
AK 1 |
| AK 1 |
Utilities |
2081 |
AK |
1 |
AK 1 |
| AK 1 |
Construction |
18581 |
AK |
1 |
AK 1 |
| AK 1 |
Manufacturing |
12294 |
AK |
1 |
AK 1 |
| AK 1 |
Wholesale trade |
9125 |
AK |
1 |
AK 1 |
| AK 1 |
Retail trade |
35221 |
AK |
1 |
AK 1 |
| AK 1 |
Transportation and warehousing |
18736 |
AK |
1 |
AK 1 |
| AK 1 |
Information |
6711 |
AK |
1 |
AK 1 |
| AK 1 |
Finance and insurance |
7340 |
AK |
1 |
AK 1 |
| ###6) |
|
|
|
|
|
Data<-read.csv(paste(path,file_name,sep=""))
df1<-Data[,-3]
df2<-Data[, c(1,3)]
df1$mergeid<-seq(1,8722,1)
df2$mergeid<-seq(1,8722,1)
7)
Data_final<- merge(df1,df2,by=c("Congressional_District","mergeid"))
data<-Data_final[1:10, ]
knitr::kable(data, caption = "Table with kable")
Table with kable
| AK 1 |
1 |
Agriculture, forestry, fishing and hunting |
819 |
| AK 1 |
10 |
Finance and insurance |
7340 |
| AK 1 |
11 |
Real estate and rental and leasing |
4455 |
| AK 1 |
12 |
Professional, scientific, and technical |
18662 |
| AK 1 |
13 |
Management of companies and enterprises |
7821 |
| AK 1 |
14 |
Administrative and support and waste |
17628 |
| AK 1 |
15 |
Educational services |
3025 |
| AK 1 |
16 |
Health care and social assistance |
49240 |
| AK 1 |
17 |
Arts, entertainment, and recreation |
4639 |
| AK 1 |
18 |
Accommodation and food services |
28112 |
8)
Group<-group_by(Data_final, NAICS_Decription)
Means<-aggregate(Group[, 4], list(Group$NAICS_Decription), mean)
Means<- Means[order(Means$Employment,decreasing=FALSE),]
data<-Means[1:10, ]
knitr::kable(data, caption = "Table with kable")
Table with kable
| 9 |
Industries not classified |
57.12815 |
| 3 |
Agriculture, forestry, fishing and hunting |
366.28837 |
| 19 |
Utilities |
1182.36384 |
| 13 |
Mining, quarrying, and oil and gas extraction |
1504.61033 |
| 16 |
Real estate and rental and leasing |
4747.62243 |
| 4 |
Arts, entertainment, and recreation |
5111.65446 |
| 11 |
Management of companies and enterprises |
7548.55149 |
| 10 |
Information |
7737.84211 |
| 6 |
Educational services |
8361.52632 |
| 18 |
Transportation and warehousing |
10539.05950 |
9)
Data<-read.csv(paste(path,file_name,sep=""))
Wide<-reshape(Data, idvar = "NAICS_Decription", timevar = "Congressional_District", direction = "wide")
names<-as.list(levels(unique(Data$Congressional_District)))
names<-append(names,list(x= "industry"),0)
colnames(Wide)<-names
data<-Wide[1:10, 1:5]
knitr::kable(data, caption = "Table with kable")
Table with kable
| Agriculture, forestry, fishing and hunting |
819 |
131 |
1061 |
451 |
| Mining, quarrying, and oil and gas extraction |
13566 |
213 |
7141 |
25054 |
| Utilities |
2081 |
2374 |
3094 |
3598 |
| Construction |
18581 |
19468 |
24836 |
25725 |
| Manufacturing |
12294 |
26420 |
17937 |
25361 |
| Wholesale trade |
9125 |
15520 |
16366 |
23980 |
| Retail trade |
35221 |
55070 |
59901 |
52005 |
| Transportation and warehousing |
18736 |
14153 |
13838 |
19664 |
| Information |
6711 |
6684 |
8724 |
6798 |
| Finance and insurance |
7340 |
42783 |
16728 |
18128 |
10)
“I could not do part 10, I would appreciate if you could do it in class.