complaint.data<-read.csv(‘/Users/dawig/Documents/Consumer_Complaintsb.csv’,header=TRUE)
complaint.data %<>%
select(Product,Company,State,Submitted.via,Company.response.to.consumer,Timely.response.,Consumer.disputed. ) write.csv(complaint.data, file = “Consumer_Complaints_Selected.csv”)
complaint.data<-read.csv('/Users/dawig/Documents/Consumer_Complaints_Selected.csv',header=TRUE)
complaint.data<-select(complaint.data,Product,Company,State,Submitted.via,Company.response.to.consumer,Timely.response.,Consumer.disputed. )
complaint.data %>% arrange(Company,State) -> complaint.data.2
head(complaint.data.2)
## Product Company State Submitted.via Company.response.to.consumer Timely.response. Consumer.disputed.
## 1 Credit reporting, credit repair services, or other personal consumer reports (Former)Shapiro, Swertfeger & Hasty, LLP AL Web Untimely response No N/A
## 2 Credit reporting, credit repair services, or other personal consumer reports (Former)Shapiro, Swertfeger & Hasty, LLP CA Web Untimely response No N/A
## 3 Mortgage (Former)Shapiro, Swertfeger & Hasty, LLP DE Referral Untimely response No No
## 4 Debt collection (Former)Shapiro, Swertfeger & Hasty, LLP GA Referral Untimely response No No
## 5 Mortgage (Former)Shapiro, Swertfeger & Hasty, LLP GA Referral Untimely response No No
## 6 Debt collection (Former)Shapiro, Swertfeger & Hasty, LLP GA Referral Untimely response No No
complaint.data.2 %>%
group_by(Company,Timely.response.) %>%
summarise(number_complaints.y = n()) %>%
filter(number_complaints.y>5000,Timely.response.=="Yes") %>%
arrange(desc(number_complaints.y),Timely.response.)->complaint.data.yes
complaint.data.2 %>%
group_by(Company,Timely.response.) %>%
summarise(number_complaints.n = n()) %>%
filter(Timely.response.=="No") %>%
arrange(desc(number_complaints.n),Timely.response.)->complaint.data.no
complaint.data.2 %>%
group_by(Company) %>%
summarise(number_complaints = n()) %>%
filter(number_complaints>5100) %>%
arrange(desc(number_complaints))->complaint.data.all
complaint.data.2<-left_join(complaint.data.all,complaint.data.yes[,-2])
## Joining, by = "Company"
complaint.data.2<-left_join(complaint.data.2[],complaint.data.no[,-2])
## Joining, by = "Company"
complaint.data.2[is.na(complaint.data.2)] <- 0
complaint.data.2 %<>%
mutate(percent_slow=number_complaints.n/number_complaints) %>%
mutate(slow_rank=(desc(rank(percent_slow))+26))
complaint.data<-read.csv('/Users/dawig/Documents/Consumer_Complaintsb.csv',header=TRUE)
pattern <- paste('WELLS', collapse = "|")
complaint.data.wells<- subset(complaint.data,grepl(pattern,Company))
head(complaint.data.wells)
write.csv(complaint.data.wells, file = "Consumer_Complaints_Wells.csv")
complaint.data<-read.csv('/Users/dawig/Documents/Consumer_Complaints_Wells.csv',header=TRUE)
complaint.data %>%
separate(Date.received,c("Month","Day", "Year"),sep="/") %>%
group_by(Year,Month,Day) %>%
summarise(number_complaints.day = n()) ->Yearly.chart
Yearly.chart<-transform(Yearly.chart, Day = as.numeric(Day))
Yearly.chart[,3]<-sprintf("%02.0f", Yearly.chart[,3])
Yearly.chart<-transform(Yearly.chart, Month = as.numeric(Month))
Yearly.chart[,2]<-sprintf("%02.0f", Yearly.chart[,2])
Yearly.chart<- unite(Yearly.chart,Date,Year,Month,Day, sep = "-",remove=FALSE)
Yearly.chart[,1]<-as.Date(Yearly.chart[,1])
Yearly.chart<- unite(Yearly.chart,Year_Month,Year,Month, sep = ".",remove=FALSE)
Business.size<-read.csv('/Users/dawig/Documents/cd_naicssector_2015.csv', header=FALSE)
Business.size<-as.matrix(Business.size)
Business.size<-Business.size[-c(1,2,3,4,6,7),]
Business.size<-Business.size[,-c(14,15,16)]
colnames(Business.size)<-Business.size[1,]
Business.size<-tbl_df(Business.size[-1,])
Business.size<-Business.size[,c(2,5,6,9)]
colnames(Business.size)<-c('State','Description','Business_size_category','Employment')
Business.size<-transform(Business.size, Employment = as.numeric(Employment))
Business.size %<>%
filter(Description!='Total') %>%
arrange(State,Description,Business_size_category) %>%
group_by(State,Description,Business_size_category) %>%
summarise(Number_of_Employees=sum(Employment))
Business.size<-spread(Business.size,Business_size_category,Number_of_Employees)
Business.size[is.na(Business.size)] <- 0
Business.size %>%
group_by(State) %>%
summarise(Number_of_Employees_State=sum(`1: Total`))-> State.total.df
Business.size<-left_join(Business.size,State.total.df)
Business.size %<>%
mutate(Sector_share=`1: Total`/Number_of_Employees_State)
Business.size %>%
filter(Description=='Educational Services',Sector_share>.0374) %>%
ggplot(aes(x=State,y=Sector_share))+geom_point(aes(x=State,y=Sector_share,size=2))+theme(axis.text.x = element_text(angle = 45, hjust = 1))+theme(panel.background = element_rect(fill = '#52b781')) + theme(legend.position="none")+labs(title='Education Sector as a Proportion of Employment',subtitle='Top 10 and Bottom 10')+ theme(plot.title = element_text(colour='#af571c', size=20))+xlab("")+ylim(.02,.12)->plot.a
Business.size %>%
filter(Description=='Educational Services',Sector_share<.0177) %>%
ggplot(aes(x=State,y=Sector_share))+geom_point(aes(x=State,y=Sector_share,size=2))+theme(axis.text.x = element_text(angle = 45, hjust = 1))+theme(panel.background = element_rect(fill = '#52b781')) + theme(legend.position="none")+xlab("")+ylim(.009,.018)->plot.b
grid.arrange(plot.a, plot.b, nrow = 2)
Business.size %>%
filter(Description=='Mining, Quarrying, and Oil and Gas Extraction',Sector_share>.011) %>%
ggplot(aes(x=State,y=Sector_share))+geom_point(aes(x=State,y=Sector_share,size=2))+theme(axis.text.x = element_text(angle = 45, hjust = 1))+theme(panel.background = element_rect(fill = '#7fd893')) + theme(legend.position="none")+labs(title='Mining, Quarrying, and Oil and Gas Extraction Sector as a Proportion of Employment',subtitle='Top 10 and Bottom 10')+ theme(plot.title = element_text(colour='#af571c', size=20))+xlab("")+ylim(.008,.12)->plot.a
Business.size %>%
filter(Description=='Mining, Quarrying, and Oil and Gas Extraction',Sector_share<.0005) %>%
ggplot(aes(x=State,y=Sector_share))+geom_point(aes(x=State,y=Sector_share,size=2))+theme(axis.text.x = element_text(angle = 45, hjust = 1))+theme(panel.background = element_rect(fill = '#7fd893')) + theme(legend.position="none")+xlab("")+ylim(.000005,.0005)->plot.b
grid.arrange(plot.a, plot.b, nrow = 2)
Business.size %>%
filter(Description=='Information',Sector_share>.028) %>%
ggplot(aes(x=State,y=Sector_share))+geom_point(aes(x=State,y=Sector_share,size=2))+theme(axis.text.x = element_text(angle = 45, hjust = 1))+theme(panel.background = element_rect(fill = '#52b781')) + theme(legend.position="none")+labs(title='Information Sector as a Proportion of Employment',subtitle='Top 10 and Bottom 10')+ theme(plot.title = element_text(colour='#af571c', size=20))+xlab("")+ylim(.025,.055)->plot.a
Business.size %>%
filter(Description=='Information',Sector_share<.0187) %>%
ggplot(aes(x=State,y=Sector_share))+geom_point(aes(x=State,y=Sector_share,size=2))+theme(axis.text.x = element_text(angle = 45, hjust = 1))+theme(panel.background = element_rect(fill = '#52b781')) + theme(legend.position="none")+xlab("")+ylim(.0142,.0188)->plot.b
grid.arrange(plot.a, plot.b, nrow = 2)
write.csv(Business.size, file = "/Users/dawig/Documents/cd_Naic_Test.csv")
Poke.matrix<-matrix(ncol=41,nrow=801)
Poke.data <-read.csv(text=getURL("https://raw.githubusercontent.com/gabartomeo/data607-cunysps/master/Project02/pokemon_proj02.csv"), header=FALSE,stringsAsFactors = FALSE)
Poke.data<- t(Poke.data)
colnames(Poke.data)<- Poke.data[1,]
Poke.data<- Poke.data[-1,]
Poke.matrix[,39]<- as.numeric(as.character(Poke.data[,39]))
for(i in c(2:26)){
Poke.matrix[,i]<- as.numeric(as.character(Poke.data[,i]))
}
lin.model<-matrix(nrow=41)
for (i in c(2:24,26)) {
lin.model.temp<-lm(Poke.matrix[,39]~Poke.matrix[,i])
lin.model[i]<-summary(lm(Poke.matrix[,39]~Poke.matrix[,i]))$r.squared
}
lin.model[1:26]
## [1] NA 0.0010181653 0.0015109262 0.0158768474 0.0105937777 0.0097068355 0.0261029767 0.0019469150 0.0160640089 0.0022914830 0.0025128752 0.0119267546 0.0003005317 0.0184437381 0.0415693352 0.0077716885 0.0127109526 0.0002607293 0.0146553462 0.1460637040 0.1965023900 0.1647058290 0.2098654149 0.1071815038 NA 0.1798325444
lin.model.20<-lm(Poke.matrix[,39]~Poke.matrix[,20])
summary(lin.model.temp)
##
## Call:
## lm(formula = Poke.matrix[, 39] ~ Poke.matrix[, i])
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.21 -37.77 -14.51 8.30 893.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.5160 9.1257 -5.316 1.38e-07 ***
## Poke.matrix[, i] 1.5053 0.1152 13.069 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.1 on 779 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1798, Adjusted R-squared: 0.1788
## F-statistic: 170.8 on 1 and 779 DF, p-value: < 2.2e-16
lin.model.21<-lm(Poke.matrix[,39]~Poke.matrix[,21])
summary(lin.model.temp)
##
## Call:
## lm(formula = Poke.matrix[, 39] ~ Poke.matrix[, i])
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.21 -37.77 -14.51 8.30 893.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.5160 9.1257 -5.316 1.38e-07 ***
## Poke.matrix[, i] 1.5053 0.1152 13.069 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.1 on 779 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1798, Adjusted R-squared: 0.1788
## F-statistic: 170.8 on 1 and 779 DF, p-value: < 2.2e-16
lin.model.22<-lm(Poke.matrix[,39]~Poke.matrix[,22])
summary(lin.model.temp)
##
## Call:
## lm(formula = Poke.matrix[, 39] ~ Poke.matrix[, i])
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.21 -37.77 -14.51 8.30 893.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.5160 9.1257 -5.316 1.38e-07 ***
## Poke.matrix[, i] 1.5053 0.1152 13.069 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.1 on 779 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1798, Adjusted R-squared: 0.1788
## F-statistic: 170.8 on 1 and 779 DF, p-value: < 2.2e-16
lin.model.23<-lm(Poke.matrix[,39]~Poke.matrix[,23])
summary(lin.model.temp)
##
## Call:
## lm(formula = Poke.matrix[, 39] ~ Poke.matrix[, i])
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.21 -37.77 -14.51 8.30 893.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.5160 9.1257 -5.316 1.38e-07 ***
## Poke.matrix[, i] 1.5053 0.1152 13.069 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.1 on 779 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1798, Adjusted R-squared: 0.1788
## F-statistic: 170.8 on 1 and 779 DF, p-value: < 2.2e-16
lin.model.24<-lm(Poke.matrix[,39]~Poke.matrix[,24])
summary(lin.model.temp)
##
## Call:
## lm(formula = Poke.matrix[, 39] ~ Poke.matrix[, i])
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.21 -37.77 -14.51 8.30 893.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.5160 9.1257 -5.316 1.38e-07 ***
## Poke.matrix[, i] 1.5053 0.1152 13.069 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.1 on 779 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1798, Adjusted R-squared: 0.1788
## F-statistic: 170.8 on 1 and 779 DF, p-value: < 2.2e-16
lin.model.26<-lm(Poke.matrix[,39]~Poke.matrix[,26])
summary(lin.model.temp)
##
## Call:
## lm(formula = Poke.matrix[, 39] ~ Poke.matrix[, i])
##
## Residuals:
## Min 1Q Median 3Q Max
## -277.21 -37.77 -14.51 8.30 893.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -48.5160 9.1257 -5.316 1.38e-07 ***
## Poke.matrix[, i] 1.5053 0.1152 13.069 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 99.1 on 779 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.1798, Adjusted R-squared: 0.1788
## F-statistic: 170.8 on 1 and 779 DF, p-value: < 2.2e-16
Poke.matrix<-tbl_df(Poke.matrix)