link

path<-'C:/Users/nfabius/Desktop/data mining/'

file_name<-"Employment_by_Congressional_District.csv"
Data<-read.csv(paste(path,file_name,sep=""))

d<-Data[Data$NAICS_Decription=="Construction",]
d<- d[order(d$Employment,decreasing=TRUE),]

1)

LA 6, Construction, 58288 is the district with the most jobs on construction

2)

mine<-Data[Data$Congressional_District=="NY 10",]

NY 10, Mining, quarrying, and oil and gas extraction, 0 is the lowest NY 10, Finance and insurance, 109148 is the highest

3)

colnames(Data)[1]<-"CD"

4)

Data$state<-substr(Data$CD, 1, 2)
Data$digit<-parse_number(Data$CD)
Data$CD_United<- paste(Data$state, Data$digit, sep=" ")
data<-Data[1:10, ]

5)

knitr::kable(data, caption = "Table with kable")
Table with kable
CD NAICS_Decription Employment state digit CD_United
AK 1 Agriculture, forestry, fishing and hunting 819 AK 1 AK 1
AK 1 Mining, quarrying, and oil and gas extraction 13566 AK 1 AK 1
AK 1 Utilities 2081 AK 1 AK 1
AK 1 Construction 18581 AK 1 AK 1
AK 1 Manufacturing 12294 AK 1 AK 1
AK 1 Wholesale trade 9125 AK 1 AK 1
AK 1 Retail trade 35221 AK 1 AK 1
AK 1 Transportation and warehousing 18736 AK 1 AK 1
AK 1 Information 6711 AK 1 AK 1
AK 1 Finance and insurance 7340 AK 1 AK 1
###6)
Data<-read.csv(paste(path,file_name,sep=""))
df1<-Data[,-3]
df2<-Data[, c(1,3)]
df1$mergeid<-seq(1,8722,1)
df2$mergeid<-seq(1,8722,1)

7)

Data_final<- merge(df1,df2,by=c("Congressional_District","mergeid"))
data<-Data_final[1:10, ]
knitr::kable(data, caption = "Table with kable")
Table with kable
Congressional_District mergeid NAICS_Decription Employment
AK 1 1 Agriculture, forestry, fishing and hunting 819
AK 1 10 Finance and insurance 7340
AK 1 11 Real estate and rental and leasing 4455
AK 1 12 Professional, scientific, and technical 18662
AK 1 13 Management of companies and enterprises 7821
AK 1 14 Administrative and support and waste 17628
AK 1 15 Educational services 3025
AK 1 16 Health care and social assistance 49240
AK 1 17 Arts, entertainment, and recreation 4639
AK 1 18 Accommodation and food services 28112

8)

Group<-group_by(Data_final, NAICS_Decription)
Means<-aggregate(Group[, 4], list(Group$NAICS_Decription), mean)
Means<- Means[order(Means$Employment,decreasing=FALSE),]
data<-Means[1:10, ]
knitr::kable(data, caption = "Table with kable")
Table with kable
Group.1 Employment
9 Industries not classified 57.12815
3 Agriculture, forestry, fishing and hunting 366.28837
19 Utilities 1182.36384
13 Mining, quarrying, and oil and gas extraction 1504.61033
16 Real estate and rental and leasing 4747.62243
4 Arts, entertainment, and recreation 5111.65446
11 Management of companies and enterprises 7548.55149
10 Information 7737.84211
6 Educational services 8361.52632
18 Transportation and warehousing 10539.05950

9)

Data<-read.csv(paste(path,file_name,sep=""))
Wide<-reshape(Data, idvar = "NAICS_Decription", timevar = "Congressional_District", direction = "wide")
names<-as.list(levels(unique(Data$Congressional_District)))
names<-append(names,list(x= "industry"),0)
colnames(Wide)<-names
data<-Wide[1:10, 1:5]
knitr::kable(data, caption = "Table with kable")
Table with kable
industry AK 1 AL 1 AL 2 AL 3
Agriculture, forestry, fishing and hunting 819 131 1061 451
Mining, quarrying, and oil and gas extraction 13566 213 7141 25054
Utilities 2081 2374 3094 3598
Construction 18581 19468 24836 25725
Manufacturing 12294 26420 17937 25361
Wholesale trade 9125 15520 16366 23980
Retail trade 35221 55070 59901 52005
Transportation and warehousing 18736 14153 13838 19664
Information 6711 6684 8724 6798
Finance and insurance 7340 42783 16728 18128

10)

“I could not do part 10, I would appreciate if you could do it in class.