In general this project aims to understand the relationships between entrepreneurs profile, their experiences in a startup incubator, and their incubator outcomes.
Graphs will have this format: Y-Axis= Variable of interest, X-Axis=Survival, Right-Axis=Angel Investment
More work experience -> More Survival -> More Angel Investment
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), PreEntryManagement1Mean = mean(PreEntryManagement1),
PreEntryManagement2 = mean(PreEntryManagement2), PreEntryManagement3 = mean(PreEntryManagement3))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count PreEntryManagement1Mean
## 1 No No 32 2.594
## 2 No Yes 4 3.500
## 3 Yes No 25 2.840
## 4 Yes Yes 31 3.129
## Variables not shown: PreEntryManagement2 (dbl), PreEntryManagement3 (dbl)
qplot(data = smallData, factor(SurvivalFeb14), PreEntryManagement1, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
qplot(data = smallData, factor(SurvivalFeb14), PreEntryManagement2, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
qplot(data = smallData, factor(SurvivalFeb14), PreEntryManagement3, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
More management experience -> More survival -> More Angel Investment
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), PrePlanningMarketMean = mean(PrePlanningMarket),
PrePlanningCompetitiveMean = mean(PrePlanningCompetitive, na.rm = TRUE),
PrePlanningBrandMean = mean(PrePlanningBrand), PrePlanningModelMean = mean(PrePlanningModel))
## Source: local data frame [4 x 7]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count PrePlanningMarketMean
## 1 No No 32 2.750
## 2 No Yes 4 2.750
## 3 Yes No 25 3.560
## 4 Yes Yes 31 3.516
## Variables not shown: PrePlanningCompetitiveMean (dbl),
## PrePlanningBrandMean (dbl), PrePlanningModelMean (dbl)
qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningMarket, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningCompetitive, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningBrand, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningModel, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
More pre-planning -> More Survival -> More Angel Investment
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), StartupExperienceMean = mean(StartupExperience,
na.rm = TRUE), StartupExperienceMedian = median(StartupExperience, na.rm = TRUE),
StartupExperienceSD = sd(StartupExperience, na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count StartupExperienceMean
## 1 No No 32 1.806
## 2 No Yes 4 1.750
## 3 Yes No 25 1.500
## 4 Yes Yes 31 1.871
## Variables not shown: StartupExperienceMedian (int), StartupExperienceSD
## (dbl)
qplot(data = gSmallData, factor(SurvivalFeb14), StartupExperience, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
More start-up experience ~-> More Survival and Angel Investment
gSmallData$Survival <- ifelse(gSmallData$SurvivalFeb14 == 1, "No", "Yes")
gSmallData <- group_by(smallData, Survival, InvestmentAngel, IncubatorMentor)
## Error: index out of bounds
summarize(gSmallData, count = n())
## Source: local data frame [4 x 3]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count
## 1 No No 32
## 2 No Yes 4
## 3 Yes No 25
## 4 Yes Yes 31
qplot(data = gSmallData, Survival, geom = "histogram", fill = IncubatorMentor)
More mentorship -> More survival and Angel Investment
# needed for pairs.panel
smallData$SurvivalFeb14 <- as.integer(as.factor(smallData$SurvivalFeb14))
smallData$InvestmentAngel <- as.integer(as.factor(smallData$InvestmentAngel))
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), NSSpouse = mean(NetworkSupportStrongSpouse,
na.rm = TRUE), NSRelatives = mean(NetworkSupportStrongRelatives), NSFriends = mean(NetworkSupportStrongFriends))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count NSSpouse NSRelatives NSFriends
## 1 1 1 32 3.870 3.656 3.688
## 2 1 2 4 3.333 3.750 4.000
## 3 2 1 25 3.818 3.800 4.000
## 4 2 2 31 3.840 3.968 4.000
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportStrongSpouse,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 23 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportStrongRelatives,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportStrongFriends,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkSupportStrongSpouse:NetworkSupportStrongFriends),
pch = ".", main = "Survival and NetworkSupport (Strong ties)")
Small evidence that Network Support Affects survival or investment
gSmallData <- group_by(smallData, SurvivalFeb14)
summarize(gSmallData, count = n(), NSBus = mean(NetworkSupportWeakBusiness,
na.rm = TRUE), NSAcq = mean(NetworkSupportWeakAcquaintances, na.rm = TRUE),
NSEmpl = mean(NetworkSupportWeakEmployers, na.rm = TRUE), NSCoW = mean(NetworkSupportWeakCoWorkers,
na.rm = TRUE))
## Source: local data frame [2 x 6]
##
## SurvivalFeb14 count NSBus NSAcq NSEmpl NSCoW
## 1 1 36 3.571 3.286 2.719 3.061
## 2 2 56 3.852 3.455 3.056 3.707
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakBusiness,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakAcquaintances,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakEmployers,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakCoWorkers,
geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers),
pch = ".", main = "Survival and NetworkSupport (Weak ties)")
Relationshiop seems to positive between network support (weak ties) but not very strong.
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), IInv1 = mean(NetworkInvestorIndirect1, na.rm = TRUE),
IInv2 = mean(NetworkInvestorIndirect2, na.rm = TRUE), IInv3 = mean(NetworkInvestorIndirect3,
na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count IInv1 IInv2 IInv3
## 1 1 1 32 2.750 2.438 2.656
## 2 1 2 4 3.500 2.750 2.500
## 3 2 1 25 3.708 3.500 3.250
## 4 2 2 31 3.800 3.933 3.900
gSmallData$Survival <- ifelse(gSmallData$SurvivalFeb14 == 1, "No", "Yes")
qplot(data = gSmallData, Survival, NetworkInvestorIndirect1, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorIndirect2, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorIndirect3, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkInvestorIndirect1:NetworkInvestorIndirect3),
pch = ".", main = "Survival and NetworkSupport (Indirect Investor ties)")
Strong effect Seems that indirect network of investor -> higher survival and angel investment
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), DInv1 = mean(NetworkInvestorDirect1, na.rm = TRUE),
DInv2 = mean(NetworkInvestorDirect2, na.rm = TRUE), DInv3 = mean(NetworkInvestorDirect3,
na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count DInv1 DInv2 DInv3
## 1 1 1 32 2.219 2.531 1.844
## 2 1 2 4 3.000 2.750 3.000
## 3 2 1 25 2.708 2.875 2.333
## 4 2 2 31 3.933 3.655 3.207
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorDirect1, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorDirect2, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorDirect3, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkInvestorDirect1:NetworkInvestorDirect3),
pch = ".", main = "Survival and NetworkSupport (Direct Investor ties)")
Strong effect Seems that direct network of investor -> higher survival and angel investment
gSmallData <- group_by(smallData, SurvivalFeb14)
summarize(gSmallData, count = n(), JobSatisfactionM = mean(SatisfactionJob,
na.rm = TRUE), IncubatorSatisfactionM = mean(SatisfactionIncubator, na.rm = TRUE))
## Source: local data frame [2 x 4]
##
## SurvivalFeb14 count JobSatisfactionM IncubatorSatisfactionM
## 1 1 36 3.639 2.639
## 2 2 56 4.357 3.643
qplot(data = gSmallData, factor(SurvivalFeb14), SatisfactionJob, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
qplot(data = gSmallData, factor(SurvivalFeb14), SatisfactionIncubator, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, SatisfactionJob:SatisfactionIncubator),
pch = ".", main = "Survival and Satisfaction (Job and Incubator")
More satisfaction -> More survival/angel investment
smallData$ResourceInvestment <- as.integer(sub(smallData$ResourceInvestment,
pattern = "%", replacement = ""))
smallData$ResourceOperations <- as.integer(sub(smallData$ResourceOperations,
pattern = "%", replacement = ""))
smallData$ResourceHuman <- as.integer(sub(smallData$ResourceHuman, pattern = "%",
replacement = ""))
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
dplyr::summarize(gSmallData, count = n(), MResInv = mean(ResourceInvestment,
na.rm = TRUE), MResOpe = mean(ResourceOperations, na.rm = TRUE), MResHum = mean(ResourceHuman,
na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count MResInv MResOpe MResHum
## 1 1 1 32 16.29 39.19 44.84
## 2 1 2 4 18.75 42.50 36.25
## 3 2 1 25 18.54 44.58 36.67
## 4 2 2 31 11.13 40.16 48.39
qplot(data = gSmallData, factor(SurvivalFeb14), ResourceInvestment, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), ResourceOperations, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), ResourceHuman, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = summarize(gSmallData, count = n(), MResInv = mean(ResourceInvestment,
na.rm = TRUE), MResOpe = mean(ResourceOperations, na.rm = TRUE), MResHum = mean(ResourceHuman,
na.rm = TRUE)), x = SurvivalFeb14, y = MResInv)
df <- dplyr::summarize(gSmallData, count = n(), MResInv = mean(ResourceInvestment,
na.rm = TRUE), MResOpe = mean(ResourceOperations, na.rm = TRUE), MResHum = mean(ResourceHuman,
na.rm = TRUE))
df <- melt(data = df, id = c(df$SurvivalFeb14, df$InvestmentAngel))
## Error: could not find function "melt"
ggplot(data = df, aes(x = factor(SurvivalFeb14), y = value)) + geom_bar(aes(fill = factor(variable)),
position = "dodge", stat = "identity") + facet_grid(InvestmentAngel ~ .) +
ggtitle(label = "Investment by teams, RightFacet=InvestmentAngel (True=2)") +
xlab(label = "Survival (Yes=2)") + ylab(label = "Investment (Percent)")
## Error: object 'variable' not found
On investment, there seems to be a weak negative relationship with Survival (maybe). On operations, no effect. On human resources, it's a weak effect as well. No conclusions.
Upon closer inspection, it might be that higest percentage of investment on human resources at this stage might predict better survival and better investmnt. Maybe.
Basic Team Variables: TeamNumberOwners, TeamNumberDecisions, TeamAge
# Fix Nathan and Ivette's ages. Wrong format
smallData$TeamAge[smallData$TeamAge == "15-34 years"] <- "25-34 years"
smallData$TeamAge[smallData$TeamAge == "24-34 years"] <- "25-34 years"
smallData$TeamAgeNum <- as.integer(factor(smallData$TeamAge))
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
dplyr::summarize(gSmallData, count = n(), TeamOwnersM = mean(TeamNumberOwners,
na.rm = TRUE), TeamDecisionsM = mean(TeamNumberDecisions, na.rm = TRUE),
TeamAgeM = mean(TeamAgeNum, na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
##
## SurvivalFeb14 InvestmentAngel count TeamOwnersM TeamDecisionsM TeamAgeM
## 1 1 1 32 2.062 2.033 2.156
## 2 1 2 4 2.000 2.250 2.000
## 3 2 1 25 2.208 2.250 2.120
## 4 2 2 31 2.645 2.400 2.032
qplot(data = gSmallData, factor(TeamAge), geom = "histogram", group = SurvivalFeb14,
fill = SurvivalFeb14, main = ("Histogram of Team Age groups and Survival"))
qplot(data = gSmallData, factor(TeamNumberOwners), geom = "histogram", group = SurvivalFeb14,
fill = SurvivalFeb14, main = "Histogram of the Number of team owners and survival")
qplot(data = gSmallData, factor(TeamNumberDecisions), geom = "histogram", group = SurvivalFeb14,
fill = SurvivalFeb14, main = "histogram of the number of team members who make decisions")
qplot(data = gSmallData, factor(SurvivalFeb14), TeamNumberOwners, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamNumberDecisions, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
It's possible a higher number of team numbers (and members who make decisions) increases survival chances. It's not a strong relationship. More interesting to see this data in the stacked histograms. TeamAge seems to be fully concentrated in the middle layer.
gSmallData <- group_by(smallData, SurvivalFeb14)
dplyr::summarize(gSmallData, count = n(), THeterM1 = mean(TeamHeterogeneity1,
na.rm = TRUE), THeterM2 = mean(TeamHeterogeneity2, na.rm = TRUE), THeterM3 = mean(TeamHeterogeneity3,
na.rm = TRUE))
## Source: local data frame [2 x 5]
##
## SurvivalFeb14 count THeterM1 THeterM2 THeterM3
## 1 1 36 3.343 3.400 3.429
## 2 2 56 3.696 3.821 3.964
qplot(data = gSmallData, factor(SurvivalFeb14), TeamHeterogeneity1, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamHeterogeneity2, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamHeterogeneity3, geom = "boxplot") +
facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
Small positive relationship team heterogenity and survival and angel investment
gSmallData <- group_by(smallData, SurvivalFeb14)
dplyr::summarize(gSmallData, count = n(), TFlexM1 = mean(TeamFlexibility1, na.rm = TRUE),
TFlexM2 = mean(TeamFlexibility2, na.rm = TRUE), TFlexM3 = mean(TeamFlexibility3,
na.rm = TRUE))
## Source: local data frame [2 x 5]
##
## SurvivalFeb14 count TFlexM1 TFlexM2 TFlexM3
## 1 1 36 3.086 2.257 2.514
## 2 2 56 3.768 3.196 2.982
qplot(data = gSmallData, factor(SurvivalFeb14), TeamFlexibility1, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamFlexibility2, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamFlexibility3, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
Team flexibliity -> Higher survival
gSmallData <- group_by(smallData, SurvivalFeb14)
dplyr::summarize(gSmallData, count = n(), TeamRelativeSizeReverseM = mean(TeamRelativeSizeInverseScored,
na.rm = TRUE))
## Source: local data frame [2 x 3]
##
## SurvivalFeb14 count TeamRelativeSizeReverseM
## 1 1 36 3.533
## 2 2 56 2.680
dplyr::summarize(gSmallData, count = n(), TPrefM1 = mean(TeamPreference1, na.rm = TRUE),
TPrefM2 = mean(TeamPreference2, na.rm = TRUE), TPrefM3 = mean(TeamPreference3,
na.rm = TRUE))
## Source: local data frame [2 x 5]
##
## SurvivalFeb14 count TPrefM1 TPrefM2 TPrefM3
## 1 1 36 3.971 3.971 4.029
## 2 2 56 4.304 4.107 4.143
qplot(data = gSmallData, factor(SurvivalFeb14), TeamRelativeSizeInverseScored,
geom = "boxplot", main = "Team relative size (Inverse Scored) higher means team too small")
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamPreference1, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamPreference2, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
qplot(data = gSmallData, factor(SurvivalFeb14), TeamPreference3, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
No obvious effect. Only the inversely scored team size variable seems to be strong.
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel, PersonalNationality)
dplyr::summarize(gSmallData, count = n())
## Source: local data frame [48 x 4]
## Groups: SurvivalFeb14, InvestmentAngel
##
## SurvivalFeb14 InvestmentAngel PersonalNationality count
## 1 1 1 Argentina 3
## 2 1 1 Brazil 2
## 3 1 1 Canada 1
## 4 1 1 Chile 7
## 5 1 1 England 1
## 6 1 1 Greece 1
## 7 1 1 India 2
## 8 1 1 Indian 1
## 9 1 1 Mexico 1
## 10 1 1 Pakistan 1
## 11 1 1 Peru 1
## 12 1 1 Sir Lanka 1
## 13 1 1 Spain 1
## 14 1 1 USA 9
## 15 1 2 Ecuador 1
## 16 1 2 Mexico 1
## 17 1 2 Russia 1
## 18 1 2 USA 1
## 19 2 1 Argentina 1
## 20 2 1 Austria 1
## 21 2 1 Chile 4
## 22 2 1 China 1
## 23 2 1 England 2
## 24 2 1 France 1
## 25 2 1 India 3
## 26 2 1 Indonesia 1
## 27 2 1 Israel 1
## 28 2 1 Italy 1
## 29 2 1 Kenya 1
## 30 2 1 Mexico 2
## 31 2 1 Peru 1
## 32 2 1 USA 5
## 33 2 2 Argentina 6
## 34 2 2 Bolivia 1
## 35 2 2 Brazil 1
## 36 2 2 Chile 2
## 37 2 2 England 1
## 38 2 2 France 1
## 39 2 2 India 1
## 40 2 2 Italy 2
## 41 2 2 Netherlands 1
## 42 2 2 New Zealand 1
## 43 2 2 Peru 1
## 44 2 2 Spain 2
## 45 2 2 USA 6
## 46 2 2 Ukraine 1
## 47 2 2 Uruguay 2
## 48 2 2 Venezuela 2
ggplot(data = dplyr::summarize(gSmallData, count = n()), aes(x = factor(PersonalNationality),
y = count)) + geom_bar(aes(fill = factor(SurvivalFeb14)), position = "dodge",
stat = "identity") + facet_grid(InvestmentAngel ~ .) + ggtitle(label = "Histogram of survival and AngelInv by country, RightFacet=InvestmentAngel (True=Yes)") +
xlab(label = "Country") + ylab(label = "Count")
qplot(data = smallData, x = PersonalNationality, group = SurvivalFeb14, fill = factor(SurvivalFeb14),
geom = "histogram", main = "Histogram of countries by survival")
qplot(data = smallData, x = PersonalNationality, group = InvestmentAngel, fill = factor(InvestmentAngel),
geom = "histogram", main = "Histogram of countries by InvestmentAngel")
# explore pairs panel
pairs.panels(dplyr::select(smallData, SurvivalFeb14, PreEntryBusinessWork:PreEntryBusinessHobbies),
pch = ".", main = "Survival and PreEntry Experience")
pairs.panels(dplyr::select(smallData, SurvivalFeb14, PreEntryManagement1:PrePlanningModel),
pch = ".", main = "Survival and PreEntry Management and PrePlanning")
pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, PreEntryManagement1:PrePlanningModel),
pch = ".", main = "Survival/AngelInvestment and PreEntry Management and PrePlanning")
# explore error bars
error.bars.by(dplyr::select(smallData, PreEntryManagement1:PrePlanningModel),
group = smallData$SurveySource)
# Observe the effect of the survey source = Online, Online2, and Paper
# explore correlation tests
corr.test(dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## Error: arguments imply differing number of rows: 0, 3
# significance
r.test(92, 0.44)
## Correlation tests
## Call:r.test(n = 92, r12 = 0.44)
## Test of significance of a correlation
## t value 4.65 with probability < 1.1e-05
## and confidence interval 0.26 0.59
cortest(dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## R1 was not square, finding R from data
## Tests of correlation matrices
## Call:cortest(R1 = dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## Chi Square value 46.83 with df = 3 with probability < 3.8e-10
# Exploratory Factor Analysis
# Pre-entry
fa.preentry <- fa(dplyr::select(smallData, PreEntryBusinessWork:StartupExperience),
4)
## Loading required package: GPArotation
## Warning: convergence not obtained in GPFoblq. 1000 iterations used.
plot(fa.preentry)
fa.diagram(fa.preentry)
fa.preentry
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, PreEntryBusinessWork:StartupExperience),
## nfactors = 4)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 MR4 h2 u2 com
## PreEntryBusinessWork -0.05 0.04 0.55 0.15 0.32 0.68 1.2
## PreEntryBusinessEducation 0.17 0.03 0.68 -0.11 0.54 0.46 1.2
## PreEntryBusinessHobbies -0.11 -0.01 0.65 0.05 0.41 0.59 1.1
## PreEntryManagement1 0.08 0.75 -0.11 -0.05 0.56 0.44 1.1
## PreEntryManagement2 -0.04 0.87 0.04 0.07 0.79 0.21 1.0
## PreEntryManagement3 -0.03 0.74 0.08 -0.11 0.55 0.45 1.1
## PrePlanningMarket 0.62 0.07 -0.03 0.41 0.73 0.27 1.8
## PrePlanningCompetitive 0.74 0.09 -0.01 0.29 0.81 0.19 1.3
## PrePlanningBrand 0.81 0.04 0.05 -0.02 0.69 0.31 1.0
## PrePlanningModel 0.98 -0.04 0.01 -0.15 0.89 0.11 1.1
## StartupExperience 0.01 -0.08 0.17 0.29 0.10 0.90 1.8
##
## MR1 MR2 MR3 MR4
## SS loadings 2.72 1.94 1.25 0.49
## Proportion Var 0.25 0.18 0.11 0.04
## Cumulative Var 0.25 0.42 0.54 0.58
## Proportion Explained 0.43 0.30 0.20 0.08
## Cumulative Proportion 0.43 0.73 0.92 1.00
##
## With factor correlations of
## MR1 MR2 MR3 MR4
## MR1 1.00 0.32 0.11 0.28
## MR2 0.32 1.00 0.25 0.21
## MR3 0.11 0.25 1.00 -0.02
## MR4 0.28 0.21 -0.02 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 4 factors are sufficient.
##
## The degrees of freedom for the null model are 55 and the objective function was 5.02 with Chi Square of 434.2
## The degrees of freedom for the model are 17 and the objective function was 0.36
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic number of observations is 91 with the empirical chi square 15.31 with prob < 0.57
## The total number of observations was 92 with MLE Chi Square = 30.55 with prob < 0.023
##
## Tucker Lewis Index of factoring reliability = 0.88
## RMSEA index = 0.102 and the 90 % confidence intervals are 0.035 0.145
## BIC = -46.32
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR2 MR3 MR4
## Correlation of scores with factors 0.97 0.93 0.83 0.79
## Multiple R square of scores with factors 0.94 0.87 0.70 0.62
## Minimum correlation of possible factor scores 0.88 0.73 0.39 0.23
# Pre-entry business
fa.preentrybusiness <- fa(dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies),
1)
fa.diagram(fa.preentrybusiness)
fa.preentrybusiness
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## PreEntryBusinessWork 0.53 0.28 0.72 1
## PreEntryBusinessEducation 0.82 0.67 0.33 1
## PreEntryBusinessHobbies 0.54 0.29 0.71 1
##
## MR1
## SS loadings 1.24
## Proportion Var 0.41
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 0.44 with Chi Square of 39.08
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 92 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.86
## Multiple R square of scores with factors 0.74
## Minimum correlation of possible factor scores 0.48
alpha(select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
##
## Reliability analysis
## Call: alpha(x = select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.65 0.65 0.57 0.39 1.9 0.11 3.6 0.88
##
## lower alpha upper 95% confidence boundaries
## 0.43 0.65 0.88
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## PreEntryBusinessWork 0.61 0.61 0.44 0.44 1.6
## PreEntryBusinessEducation 0.44 0.44 0.29 0.29 0.8
## PreEntryBusinessHobbies 0.61 0.61 0.44 0.44 1.5
## alpha se
## PreEntryBusinessWork 0.17
## PreEntryBusinessEducation 0.19
## PreEntryBusinessHobbies 0.17
##
## Item statistics
## n r r.cor r.drop mean sd
## PreEntryBusinessWork 92 0.75 0.53 0.42 3.7 1.2
## PreEntryBusinessEducation 92 0.81 0.67 0.55 3.7 1.1
## PreEntryBusinessHobbies 92 0.75 0.53 0.43 3.6 1.2
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## PreEntryBusinessWork 0.04 0.13 0.22 0.32 0.29 0
## PreEntryBusinessEducation 0.01 0.17 0.23 0.30 0.28 0
## PreEntryBusinessHobbies 0.08 0.13 0.18 0.38 0.23 0
# Pre-entry Management
fa.preentrymgt <- fa(dplyr::select(smallData, PreEntryManagement1:PreEntryManagement3),
1)
fa.diagram(fa.preentrymgt)
fa.preentrymgt
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, PreEntryManagement1:PreEntryManagement3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## PreEntryManagement1 0.73 0.54 0.46 1
## PreEntryManagement2 0.87 0.76 0.24 1
## PreEntryManagement3 0.73 0.53 0.47 1
##
## MR1
## SS loadings 1.83
## Proportion Var 0.61
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 1.1 with Chi Square of 97.84
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 92 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.92
## Multiple R square of scores with factors 0.85
## Minimum correlation of possible factor scores 0.69
alpha(select(smallData, PreEntryManagement1:PreEntryManagement3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, PreEntryManagement1:PreEntryManagement3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.82 0.82 0.76 0.6 4.6 0.091 2.8 1
##
## lower alpha upper 95% confidence boundaries
## 0.64 0.82 0.99
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## PreEntryManagement1 0.77 0.78 0.64 0.64 3.5 0.14
## PreEntryManagement2 0.69 0.70 0.54 0.54 2.3 0.16
## PreEntryManagement3 0.78 0.78 0.64 0.64 3.6 0.14
##
## Item statistics
## n r r.cor r.drop mean sd
## PreEntryManagement1 92 0.85 0.72 0.65 2.9 1.1
## PreEntryManagement2 92 0.88 0.81 0.73 2.9 1.1
## PreEntryManagement3 92 0.84 0.72 0.65 2.7 1.3
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## PreEntryManagement1 0.13 0.25 0.29 0.26 0.07 0
## PreEntryManagement2 0.18 0.11 0.38 0.28 0.04 0
## PreEntryManagement3 0.25 0.22 0.24 0.18 0.11 0
# Pre-entry Bus Planning
fa.preentrypln <- fa(dplyr::select(smallData, PrePlanningMarket:PrePlanningModel),
1)
fa.diagram(fa.preentrypln)
fa.preentrypln
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, PrePlanningMarket:PrePlanningModel),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## PrePlanningMarket 0.79 0.63 0.37 1
## PrePlanningCompetitive 0.88 0.77 0.23 1
## PrePlanningBrand 0.83 0.68 0.32 1
## PrePlanningModel 0.86 0.74 0.26 1
##
## MR1
## SS loadings 2.83
## Proportion Var 0.71
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 2.71 with Chi Square of 240.8
## The degrees of freedom for the model are 2 and the objective function was 0.15
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic number of observations is 92 with the empirical chi square 2.57 with prob < 0.28
## The total number of observations was 92 with MLE Chi Square = 13.16 with prob < 0.0014
##
## Tucker Lewis Index of factoring reliability = 0.856
## RMSEA index = 0.252 and the 90 % confidence intervals are 0.132 0.38
## BIC = 4.12
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.95
## Multiple R square of scores with factors 0.91
## Minimum correlation of possible factor scores 0.82
alpha(select(smallData, PrePlanningMarket:PrePlanningModel))
##
## Reliability analysis
## Call: alpha(x = select(smallData, PrePlanningMarket:PrePlanningModel))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.9 0.9 0.89 0.7 9.5 0.058 3.2 1.2
##
## lower alpha upper 95% confidence boundaries
## 0.79 0.9 1.02
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## PrePlanningMarket 0.89 0.89 0.85 0.74 8.4 0.079
## PrePlanningCompetitive 0.86 0.86 0.82 0.68 6.3 0.083
## PrePlanningBrand 0.88 0.88 0.84 0.71 7.4 0.081
## PrePlanningModel 0.87 0.87 0.82 0.69 6.6 0.083
##
## Item statistics
## n r r.cor r.drop mean sd
## PrePlanningMarket 92 0.85 0.78 0.73 3.2 1.2
## PrePlanningCompetitive 91 0.90 0.87 0.82 3.2 1.3
## PrePlanningBrand 92 0.87 0.82 0.78 3.1 1.4
## PrePlanningModel 92 0.89 0.85 0.81 3.3 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## PrePlanningMarket 0.09 0.22 0.21 0.36 0.13 0.00
## PrePlanningCompetitive 0.12 0.19 0.24 0.29 0.16 0.01
## PrePlanningBrand 0.16 0.18 0.22 0.23 0.21 0.00
## PrePlanningModel 0.15 0.16 0.22 0.20 0.27 0.00
# Pre-entry Org Capital Only one measurement don't work!
fa.preentryorgcap <- fa(dplyr::select(smallData, StartupExperience), 1)
## In fa, too many factors requested for this number of variables to use SMC for communality estimates, 1s are used instead
fa.diagram(fa.preentryorgcap)
fa.preentryorgcap
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, StartupExperience), nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## StartupExperience 0 1e-30 1 1
##
## MR1
## SS loadings 0
## Proportion Var 0
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 0 and the objective function was 0 with Chi Square of 0
## The degrees of freedom for the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is NaN
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 90 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = NaN
## Fit based upon off diagonal values = NaN
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0
## Multiple R square of scores with factors 0
## Minimum correlation of possible factor scores -1
alpha(data.frame(1, 1, 1))
## Warning: Item = X1 had no variance and was deleted
## Warning: Item = X1.1 had no variance and was deleted
## Warning: Item = X1.2 had no variance and was deleted
## Error: only defined on a data frame with all numeric variables
df <- select(smallData, StartupExperience)
# Mentorship: Together
fa.mentorship <- fa(dplyr::select(smallData, MFQCareerSupport1:MFQRoleModeling3),
3)
fa.diagram(fa.mentorship)
fa.mentorship
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, MFQCareerSupport1:MFQRoleModeling3),
## nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR3 MR2 h2 u2 com
## MFQCareerSupport1 0.06 0.82 -0.09 0.68 0.317 1.0
## MFQCareerSupport2 0.02 0.90 0.01 0.83 0.174 1.0
## MFQCareerSupport3 -0.05 0.93 0.06 0.86 0.139 1.0
## MFQPsySupport1 0.97 -0.01 -0.04 0.92 0.075 1.0
## MFQPsySupport2 0.95 0.05 -0.01 0.93 0.072 1.0
## MFQPsySupport3 0.83 -0.04 0.09 0.73 0.272 1.0
## MFQRoleModeling1 0.15 0.06 0.63 0.48 0.516 1.1
## MFQRoleModeling2 0.05 -0.06 0.94 0.90 0.098 1.0
## MFQRoleModeling3 -0.08 0.05 0.93 0.83 0.166 1.0
##
## MR1 MR3 MR2
## SS loadings 2.62 2.37 2.18
## Proportion Var 0.29 0.26 0.24
## Cumulative Var 0.29 0.55 0.80
## Proportion Explained 0.37 0.33 0.30
## Cumulative Proportion 0.37 0.70 1.00
##
## With factor correlations of
## MR1 MR3 MR2
## MR1 1.00 0.23 0.27
## MR3 0.23 1.00 0.13
## MR2 0.27 0.13 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 36 and the objective function was 7.93 with Chi Square of 691.4
## The degrees of freedom for the model are 12 and the objective function was 0.42
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 46 with the empirical chi square 1.91 with prob < 1
## The total number of observations was 92 with MLE Chi Square = 35.77 with prob < 0.00035
##
## Tucker Lewis Index of factoring reliability = 0.889
## RMSEA index = 0.155 and the 90 % confidence intervals are 0.093 0.203
## BIC = -18.49
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR3 MR2
## Correlation of scores with factors 0.98 0.96 0.97
## Multiple R square of scores with factors 0.97 0.93 0.94
## Minimum correlation of possible factor scores 0.93 0.86 0.88
# Mentorship: Career Support
fa.mencareer <- fa(dplyr::select(smallData, MFQCareerSupport1:MFQCareerSupport3),
1)
fa.diagram(fa.mencareer)
fa.mencareer
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, MFQCareerSupport1:MFQCareerSupport3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## MFQCareerSupport1 0.82 0.67 0.33 1
## MFQCareerSupport2 0.91 0.84 0.16 1
## MFQCareerSupport3 0.92 0.84 0.16 1
##
## MR1
## SS loadings 2.35
## Proportion Var 0.78
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 2.17 with Chi Square of 193.9
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 46 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.96
## Multiple R square of scores with factors 0.93
## Minimum correlation of possible factor scores 0.85
alpha(select(smallData, MFQCareerSupport1:MFQCareerSupport3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, MFQCareerSupport1:MFQCareerSupport3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.91 0.91 0.88 0.78 11 0.075 3.8 1.2
##
## lower alpha upper 95% confidence boundaries
## 0.77 0.91 1.06
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## MFQCareerSupport1 0.91 0.91 0.84 0.84 10.5 0.12
## MFQCareerSupport2 0.85 0.86 0.75 0.75 6.1 0.13
## MFQCareerSupport3 0.85 0.86 0.75 0.75 6.0 0.13
##
## Item statistics
## n r r.cor r.drop mean sd
## MFQCareerSupport1 46 0.90 0.81 0.78 3.8 1.2
## MFQCareerSupport2 46 0.93 0.89 0.85 3.7 1.4
## MFQCareerSupport3 46 0.94 0.90 0.85 3.8 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## MFQCareerSupport1 0.02 0.20 0.11 0.26 0.41 0.5
## MFQCareerSupport2 0.11 0.11 0.15 0.26 0.37 0.5
## MFQCareerSupport3 0.13 0.09 0.07 0.33 0.39 0.5
# Mentorship: psychosocial support
fa.menpsy <- fa(dplyr::select(smallData, MFQPsySupport1:MFQPsySupport3), 1)
fa.diagram(fa.menpsy)
fa.menpsy
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, MFQPsySupport1:MFQPsySupport3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## MFQPsySupport1 0.97 0.94 0.065 1
## MFQPsySupport2 0.96 0.91 0.086 1
## MFQPsySupport3 0.84 0.71 0.287 1
##
## MR1
## SS loadings 2.56
## Proportion Var 0.85
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 3.09 with Chi Square of 275.4
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 46 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.98
## Multiple R square of scores with factors 0.97
## Minimum correlation of possible factor scores 0.93
alpha(select(smallData, MFQPsySupport1:MFQPsySupport3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, MFQPsySupport1:MFQPsySupport3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.94 0.94 0.93 0.85 17 0.07 3 1.3
##
## lower alpha upper 95% confidence boundaries
## 0.81 0.94 1.08
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## MFQPsySupport1 0.89 0.89 0.81 0.81 8.4 0.13
## MFQPsySupport2 0.90 0.90 0.82 0.82 8.9 0.12
## MFQPsySupport3 0.96 0.96 0.92 0.92 24.5 0.11
##
## Item statistics
## n r r.cor r.drop mean sd
## MFQPsySupport1 46 0.96 0.95 0.91 3 1.4
## MFQPsySupport2 46 0.96 0.95 0.91 3 1.3
## MFQPsySupport3 46 0.92 0.84 0.83 3 1.5
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## MFQPsySupport1 0.17 0.26 0.15 0.24 0.17 0.5
## MFQPsySupport2 0.15 0.24 0.20 0.24 0.17 0.5
## MFQPsySupport3 0.22 0.17 0.20 0.20 0.22 0.5
# Mentorship: RoleModeling
fa.menrole <- fa(dplyr::select(smallData, MFQRoleModeling1:MFQRoleModeling3),
1)
fa.diagram(fa.menrole)
fa.menrole
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, MFQRoleModeling1:MFQRoleModeling3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## MFQRoleModeling1 0.68 0.46 0.542 1
## MFQRoleModeling2 0.96 0.92 0.084 1
## MFQRoleModeling3 0.89 0.80 0.202 1
##
## MR1
## SS loadings 2.17
## Proportion Var 0.72
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 1.87 with Chi Square of 167.1
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 46 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.97
## Multiple R square of scores with factors 0.94
## Minimum correlation of possible factor scores 0.88
alpha(select(smallData, MFQRoleModeling1:MFQRoleModeling3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, MFQRoleModeling1:MFQRoleModeling3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.87 0.88 0.85 0.7 7.1 0.081 3.6 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.72 0.87 1.03
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## MFQRoleModeling1 0.92 0.92 0.86 0.86 11.8 0.12
## MFQRoleModeling2 0.75 0.75 0.60 0.60 3.1 0.15
## MFQRoleModeling3 0.79 0.79 0.65 0.65 3.7 0.14
##
## Item statistics
## n r r.cor r.drop mean sd
## MFQRoleModeling1 46 0.84 0.68 0.65 3.4 1.2
## MFQRoleModeling2 46 0.93 0.91 0.85 3.7 1.2
## MFQRoleModeling3 46 0.92 0.89 0.80 3.7 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## MFQRoleModeling1 0.07 0.20 0.20 0.35 0.20 0.5
## MFQRoleModeling2 0.04 0.15 0.17 0.35 0.28 0.5
## MFQRoleModeling3 0.13 0.07 0.11 0.33 0.37 0.5
# Networking: Together
fa.net <- fa(dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3),
4)
fa.diagram(fa.net)
fa.net
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3),
## nfactors = 4)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR3 MR4 MR2 h2 u2 com
## NetworkSupportStrongSpouse -0.07 0.02 -0.06 0.53 0.27 0.730 1.1
## NetworkSupportStrongRelatives -0.09 0.02 0.04 0.97 0.94 0.063 1.0
## NetworkSupportStrongFriends 0.27 -0.05 -0.04 0.66 0.53 0.465 1.3
## NetworkSupportWeakBusiness 0.30 0.49 -0.22 0.10 0.39 0.613 2.2
## NetworkSupportWeakAcquaintances 0.28 0.55 0.02 -0.13 0.45 0.546 1.6
## NetworkSupportWeakEmployers -0.17 0.97 0.11 -0.02 0.89 0.108 1.1
## NetworkSupportWeakCoWorkers 0.23 0.76 -0.13 0.11 0.73 0.272 1.3
## NetworkInvestorIndirect1 0.95 0.00 0.05 -0.05 0.95 0.046 1.0
## NetworkInvestorIndirect2 0.76 0.02 0.18 0.03 0.79 0.214 1.1
## NetworkInvestorIndirect3 0.69 0.05 0.20 0.12 0.76 0.243 1.3
## NetworkInvestorDirect1 0.26 0.02 0.66 0.08 0.73 0.267 1.3
## NetworkInvestorDirect2 0.29 0.06 0.59 -0.04 0.64 0.357 1.5
## NetworkInvestorDirect3 0.00 0.01 0.90 0.02 0.81 0.193 1.0
##
## MR1 MR3 MR4 MR2
## SS loadings 2.89 2.18 2.06 1.75
## Proportion Var 0.22 0.17 0.16 0.13
## Cumulative Var 0.22 0.39 0.55 0.68
## Proportion Explained 0.33 0.25 0.23 0.20
## Cumulative Proportion 0.33 0.57 0.80 1.00
##
## With factor correlations of
## MR1 MR3 MR4 MR2
## MR1 1.00 0.27 0.58 0.18
## MR3 0.27 1.00 0.12 0.15
## MR4 0.58 0.12 1.00 0.14
## MR2 0.18 0.15 0.14 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 4 factors are sufficient.
##
## The degrees of freedom for the null model are 78 and the objective function was 9 with Chi Square of 772.2
## The degrees of freedom for the model are 32 and the objective function was 0.85
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic number of observations is 78 with the empirical chi square 21.4 with prob < 0.92
## The total number of observations was 92 with MLE Chi Square = 70.73 with prob < 9.6e-05
##
## Tucker Lewis Index of factoring reliability = 0.859
## RMSEA index = 0.125 and the 90 % confidence intervals are 0.079 0.151
## BIC = -73.97
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR3 MR4 MR2
## Correlation of scores with factors 0.98 0.96 0.94 0.97
## Multiple R square of scores with factors 0.96 0.92 0.89 0.94
## Minimum correlation of possible factor scores 0.93 0.84 0.77 0.88
alpha(select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.86 0.85 0.93 0.31 5.8 0.032 3.3 0.86
##
## lower alpha upper 95% confidence boundaries
## 0.8 0.86 0.92
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## NetworkSupportStrongSpouse 0.87 0.87 0.93 0.35 6.5
## NetworkSupportStrongRelatives 0.86 0.85 0.92 0.33 5.8
## NetworkSupportStrongFriends 0.86 0.85 0.92 0.32 5.6
## NetworkSupportWeakBusiness 0.86 0.85 0.92 0.32 5.7
## NetworkSupportWeakAcquaintances 0.85 0.84 0.92 0.31 5.4
## NetworkSupportWeakEmployers 0.86 0.85 0.92 0.32 5.7
## NetworkSupportWeakCoWorkers 0.85 0.84 0.91 0.31 5.3
## NetworkInvestorIndirect1 0.84 0.83 0.91 0.29 4.8
## NetworkInvestorIndirect2 0.84 0.83 0.91 0.29 4.9
## NetworkInvestorIndirect3 0.83 0.83 0.91 0.28 4.8
## NetworkInvestorDirect1 0.84 0.83 0.92 0.29 5.0
## NetworkInvestorDirect2 0.84 0.84 0.92 0.30 5.1
## NetworkInvestorDirect3 0.85 0.84 0.92 0.31 5.3
## alpha se
## NetworkSupportStrongSpouse 0.032
## NetworkSupportStrongRelatives 0.033
## NetworkSupportStrongFriends 0.034
## NetworkSupportWeakBusiness 0.034
## NetworkSupportWeakAcquaintances 0.035
## NetworkSupportWeakEmployers 0.034
## NetworkSupportWeakCoWorkers 0.035
## NetworkInvestorIndirect1 0.037
## NetworkInvestorIndirect2 0.037
## NetworkInvestorIndirect3 0.037
## NetworkInvestorDirect1 0.036
## NetworkInvestorDirect2 0.036
## NetworkInvestorDirect3 0.035
##
## Item statistics
## n r r.cor r.drop mean sd
## NetworkSupportStrongSpouse 62 0.25 0.17 0.11 3.8 1.3
## NetworkSupportStrongRelatives 92 0.45 0.42 0.31 3.8 1.2
## NetworkSupportStrongFriends 92 0.53 0.50 0.42 3.9 1.1
## NetworkSupportWeakBusiness 89 0.50 0.46 0.39 3.7 1.2
## NetworkSupportWeakAcquaintances 90 0.59 0.55 0.49 3.4 1.2
## NetworkSupportWeakEmployers 68 0.50 0.48 0.38 2.9 1.4
## NetworkSupportWeakCoWorkers 74 0.61 0.59 0.50 3.4 1.4
## NetworkInvestorIndirect1 90 0.78 0.79 0.75 3.4 1.6
## NetworkInvestorIndirect2 90 0.78 0.78 0.74 3.2 1.5
## NetworkInvestorIndirect3 90 0.81 0.81 0.78 3.2 1.6
## NetworkInvestorDirect1 90 0.72 0.71 0.68 3.0 1.6
## NetworkInvestorDirect2 89 0.69 0.67 0.64 3.0 1.5
## NetworkInvestorDirect3 89 0.62 0.60 0.56 2.5 1.5
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## NetworkSupportStrongSpouse 0.08 0.06 0.23 0.21 0.42 0.33
## NetworkSupportStrongRelatives 0.04 0.13 0.15 0.33 0.35 0.00
## NetworkSupportStrongFriends 0.02 0.13 0.17 0.28 0.39 0.00
## NetworkSupportWeakBusiness 0.04 0.15 0.17 0.30 0.34 0.03
## NetworkSupportWeakAcquaintances 0.11 0.12 0.20 0.40 0.17 0.02
## NetworkSupportWeakEmployers 0.26 0.09 0.28 0.22 0.15 0.26
## NetworkSupportWeakCoWorkers 0.16 0.07 0.23 0.27 0.27 0.20
## NetworkInvestorIndirect1 0.23 0.08 0.09 0.27 0.33 0.02
## NetworkInvestorIndirect2 0.22 0.11 0.17 0.21 0.29 0.02
## NetworkInvestorIndirect3 0.26 0.07 0.17 0.22 0.29 0.02
## NetworkInvestorDirect1 0.29 0.14 0.12 0.21 0.23 0.02
## NetworkInvestorDirect2 0.24 0.18 0.17 0.18 0.24 0.03
## NetworkInvestorDirect3 0.38 0.19 0.12 0.18 0.12 0.03
# Networking: Strong Ties
fa.netstrong <- fa(dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends),
1)
fa.diagram(fa.netstrong)
fa.netstrong
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## NetworkSupportStrongSpouse 0.51 0.26 0.74 1
## NetworkSupportStrongRelatives 0.96 0.92 0.08 1
## NetworkSupportStrongFriends 0.69 0.47 0.53 1
##
## MR1
## SS loadings 1.65
## Proportion Var 0.55
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 0.85 with Chi Square of 75.39
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 73 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = NaN
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.96
## Multiple R square of scores with factors 0.93
## Minimum correlation of possible factor scores 0.85
alpha(select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends))
##
## Reliability analysis
## Call: alpha(x = select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.75 0.75 0.7 0.5 3.1 0.1 3.8 0.99
##
## lower alpha upper 95% confidence boundaries
## 0.55 0.75 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## NetworkSupportStrongSpouse 0.79 0.79 0.66 0.66 3.9
## NetworkSupportStrongRelatives 0.54 0.55 0.38 0.38 1.2
## NetworkSupportStrongFriends 0.65 0.65 0.48 0.48 1.8
## alpha se
## NetworkSupportStrongSpouse 0.14
## NetworkSupportStrongRelatives 0.18
## NetworkSupportStrongFriends 0.16
##
## Item statistics
## n r r.cor r.drop mean sd
## NetworkSupportStrongSpouse 62 0.76 0.53 0.47 3.8 1.3
## NetworkSupportStrongRelatives 92 0.87 0.80 0.68 3.8 1.2
## NetworkSupportStrongFriends 92 0.83 0.72 0.60 3.9 1.1
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## NetworkSupportStrongSpouse 0.08 0.06 0.23 0.21 0.42 0.33
## NetworkSupportStrongRelatives 0.04 0.13 0.15 0.33 0.35 0.00
## NetworkSupportStrongFriends 0.02 0.13 0.17 0.28 0.39 0.00
# Networking: Weak Ties
fa.netweak <- fa(dplyr::select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers),
1)
fa.diagram(fa.netweak)
fa.netweak
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## NetworkSupportWeakBusiness 0.63 0.40 0.60 1
## NetworkSupportWeakAcquaintances 0.63 0.40 0.60 1
## NetworkSupportWeakEmployers 0.84 0.71 0.29 1
## NetworkSupportWeakCoWorkers 0.83 0.70 0.30 1
##
## MR1
## SS loadings 2.20
## Proportion Var 0.55
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 1.72 with Chi Square of 153
## The degrees of freedom for the model are 2 and the objective function was 0.18
##
## The root mean square of the residuals (RMSR) is 0.08
## The df corrected root mean square of the residuals is 0.14
##
## The harmonic number of observations is 73 with the empirical chi square 5.85 with prob < 0.054
## The total number of observations was 92 with MLE Chi Square = 16.1 with prob < 0.00032
##
## Tucker Lewis Index of factoring reliability = 0.71
## RMSEA index = 0.283 and the 90 % confidence intervals are 0.162 0.409
## BIC = 7.06
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.92
## Multiple R square of scores with factors 0.85
## Minimum correlation of possible factor scores 0.70
alpha(select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers))
##
## Reliability analysis
## Call: alpha(x = select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.83 0.83 0.81 0.55 4.8 0.07 3.4 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.69 0.83 0.97
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## NetworkSupportWeakBusiness 0.81 0.81 0.77 0.58 4.2
## NetworkSupportWeakAcquaintances 0.81 0.80 0.76 0.58 4.1
## NetworkSupportWeakEmployers 0.75 0.75 0.67 0.50 3.0
## NetworkSupportWeakCoWorkers 0.76 0.77 0.69 0.52 3.3
## alpha se
## NetworkSupportWeakBusiness 0.091
## NetworkSupportWeakAcquaintances 0.092
## NetworkSupportWeakEmployers 0.101
## NetworkSupportWeakCoWorkers 0.099
##
## Item statistics
## n r r.cor r.drop mean sd
## NetworkSupportWeakBusiness 89 0.78 0.66 0.60 3.7 1.2
## NetworkSupportWeakAcquaintances 90 0.79 0.67 0.61 3.4 1.2
## NetworkSupportWeakEmployers 68 0.85 0.81 0.73 2.9 1.4
## NetworkSupportWeakCoWorkers 74 0.83 0.78 0.70 3.4 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## NetworkSupportWeakBusiness 0.04 0.15 0.17 0.30 0.34 0.03
## NetworkSupportWeakAcquaintances 0.11 0.12 0.20 0.40 0.17 0.02
## NetworkSupportWeakEmployers 0.26 0.09 0.28 0.22 0.15 0.26
## NetworkSupportWeakCoWorkers 0.16 0.07 0.23 0.27 0.27 0.20
# Investor: Indirect Ties
fa.invindirect <- fa(dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3),
1)
fa.diagram(fa.invindirect)
fa.invindirect
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## NetworkInvestorIndirect1 0.92 0.85 0.15 1
## NetworkInvestorIndirect2 0.92 0.85 0.15 1
## NetworkInvestorIndirect3 0.88 0.78 0.22 1
##
## MR1
## SS loadings 2.48
## Proportion Var 0.83
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 2.56 with Chi Square of 228.3
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 90 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.97
## Multiple R square of scores with factors 0.94
## Minimum correlation of possible factor scores 0.88
alpha(select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.94 0.94 0.91 0.83 14 0.072 3.3 1.5
##
## lower alpha upper 95% confidence boundaries
## 0.79 0.94 1.08
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## NetworkInvestorIndirect1 0.90 0.90 0.81 0.81 8.8
## NetworkInvestorIndirect2 0.90 0.90 0.82 0.82 8.9
## NetworkInvestorIndirect3 0.92 0.92 0.85 0.85 11.5
## alpha se
## NetworkInvestorIndirect1 0.12
## NetworkInvestorIndirect2 0.12
## NetworkInvestorIndirect3 0.12
##
## Item statistics
## n r r.cor r.drop mean sd
## NetworkInvestorIndirect1 90 0.95 0.91 0.88 3.4 1.6
## NetworkInvestorIndirect2 90 0.94 0.91 0.87 3.2 1.5
## NetworkInvestorIndirect3 90 0.93 0.87 0.85 3.2 1.6
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## NetworkInvestorIndirect1 0.23 0.08 0.09 0.27 0.33 0.02
## NetworkInvestorIndirect2 0.22 0.11 0.17 0.21 0.29 0.02
## NetworkInvestorIndirect3 0.26 0.07 0.17 0.22 0.29 0.02
# Investor: Direct Ties
fa.invdirect <- fa(dplyr::select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3),
1)
fa.diagram(fa.invdirect)
fa.invdirect
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## NetworkInvestorDirect1 0.86 0.74 0.26 1
## NetworkInvestorDirect2 0.81 0.66 0.34 1
## NetworkInvestorDirect3 0.85 0.72 0.28 1
##
## MR1
## SS loadings 2.12
## Proportion Var 0.71
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 1.58 with Chi Square of 140.9
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 89 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.94
## Multiple R square of scores with factors 0.88
## Minimum correlation of possible factor scores 0.76
alpha(select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.88 0.88 0.83 0.71 7.3 0.081 2.8 1.4
##
## lower alpha upper 95% confidence boundaries
## 0.72 0.88 1.04
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## NetworkInvestorDirect1 0.82 0.82 0.69 0.69 4.5 0.14
## NetworkInvestorDirect2 0.85 0.85 0.73 0.73 5.5 0.13
## NetworkInvestorDirect3 0.82 0.82 0.70 0.70 4.7 0.14
##
## Item statistics
## n r r.cor r.drop mean sd
## NetworkInvestorDirect1 90 0.90 0.83 0.78 3.0 1.6
## NetworkInvestorDirect2 89 0.89 0.79 0.75 3.0 1.5
## NetworkInvestorDirect3 89 0.90 0.83 0.77 2.5 1.5
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## NetworkInvestorDirect1 0.29 0.14 0.12 0.21 0.23 0.02
## NetworkInvestorDirect2 0.24 0.18 0.17 0.18 0.24 0.03
## NetworkInvestorDirect3 0.38 0.19 0.12 0.18 0.12 0.03
# Satisfaction
fa.satis <- fa(dplyr::select(smallData, SatisfactionJob:SatisfactionIncubator),
1)
## In fa, too many factors requested for this number of variables to use SMC for communality estimates, 1s are used instead
fa.diagram(fa.satis)
fa.satis
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, SatisfactionJob:SatisfactionIncubator),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## SatisfactionJob 0.75 0.56 0.44 1
## SatisfactionIncubator 0.75 0.56 0.44 1
##
## MR1
## SS loadings 1.12
## Proportion Var 0.56
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 1 and the objective function was 0.37 with Chi Square of 33.55
## The degrees of freedom for the model are -1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 92 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = 1.031
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.85
## Multiple R square of scores with factors 0.72
## Minimum correlation of possible factor scores 0.43
alpha(select(smallData, SatisfactionJob:SatisfactionIncubator))
## Warning: data length [10] is not a sub-multiple or multiple of the number
## of columns [6]
##
## Reliability analysis
## Call: alpha(x = select(smallData, SatisfactionJob:SatisfactionIncubator))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.66 0.72 0.56 0.56 2.5 0.16 3.7 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.35 0.66 0.97
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## SatisfactionJob 0.56 0.56 0.31 0.56 NA 0.078
## SatisfactionIncubator 0.56 0.31 0.56 NA 0.56 0.078
##
## Item statistics
## n r r.cor r.drop mean sd
## SatisfactionJob 92 0.88 0.66 0.56 4.1 0.88
## SatisfactionIncubator 92 0.88 0.66 0.56 3.2 1.48
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## SatisfactionJob 0.00 0.03 0.25 0.33 0.39 0
## SatisfactionIncubator 0.21 0.12 0.14 0.28 0.25 0
# Team Together
fa.team <- fa(dplyr::select(smallData, TeamHeterogeneity1:TeamPreference3),
3)
fa.diagram(fa.team)
fa.team
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, TeamHeterogeneity1:TeamPreference3),
## nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 h2 u2 com
## TeamHeterogeneity1 0.95 0.06 -0.16 0.84 0.16 1.1
## TeamHeterogeneity2 0.85 -0.01 0.16 0.85 0.15 1.1
## TeamHeterogeneity3 0.84 -0.05 0.13 0.78 0.22 1.1
## TeamFlexibility1 0.09 0.07 0.65 0.50 0.50 1.1
## TeamFlexibility2 0.00 0.03 0.67 0.45 0.55 1.0
## TeamFlexibility3 0.05 0.03 0.70 0.52 0.48 1.0
## TeamRelativeSizeInverseScored -0.22 -0.28 -0.12 0.22 0.78 2.3
## TeamPreference1 0.07 0.86 0.07 0.81 0.19 1.0
## TeamPreference2 -0.11 0.87 0.10 0.77 0.23 1.1
## TeamPreference3 0.06 0.84 -0.14 0.68 0.32 1.1
##
## MR1 MR2 MR3
## SS loadings 2.50 2.35 1.57
## Proportion Var 0.25 0.24 0.16
## Cumulative Var 0.25 0.49 0.64
## Proportion Explained 0.39 0.37 0.24
## Cumulative Proportion 0.39 0.76 1.00
##
## With factor correlations of
## MR1 MR2 MR3
## MR1 1.00 0.27 0.37
## MR2 0.27 1.00 0.28
## MR3 0.37 0.28 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 45 and the objective function was 6.05 with Chi Square of 525.3
## The degrees of freedom for the model are 18 and the objective function was 0.58
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic number of observations is 89 with the empirical chi square 11.62 with prob < 0.87
## The total number of observations was 92 with MLE Chi Square = 48.79 with prob < 0.00011
##
## Tucker Lewis Index of factoring reliability = 0.836
## RMSEA index = 0.145 and the 90 % confidence intervals are 0.091 0.183
## BIC = -32.61
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR2 MR3
## Correlation of scores with factors 0.97 0.95 0.88
## Multiple R square of scores with factors 0.93 0.91 0.77
## Minimum correlation of possible factor scores 0.87 0.81 0.54
alpha(select(smallData, TeamHeterogeneity1:TeamPreference3))
## Warning: Some items were negatively correlated with total scale and were
## automatically reversed.
##
## Reliability analysis
## Call: alpha(x = select(smallData, TeamHeterogeneity1:TeamPreference3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.83 0.84 0.9 0.34 5.2 0.04 3.6 0.79
##
## lower alpha upper 95% confidence boundaries
## 0.76 0.83 0.91
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## TeamHeterogeneity1 0.81 0.82 0.89 0.34 4.6
## TeamHeterogeneity2 0.80 0.81 0.88 0.32 4.3
## TeamHeterogeneity3 0.81 0.82 0.89 0.33 4.4
## TeamFlexibility1 0.82 0.83 0.90 0.35 4.8
## TeamFlexibility2 0.83 0.83 0.90 0.35 4.9
## TeamFlexibility3 0.82 0.83 0.90 0.35 4.8
## TeamRelativeSizeInverseScored- 0.83 0.83 0.91 0.35 4.9
## TeamPreference1 0.82 0.81 0.88 0.33 4.3
## TeamPreference2 0.83 0.82 0.89 0.34 4.7
## TeamPreference3 0.83 0.83 0.89 0.35 4.8
## alpha se
## TeamHeterogeneity1 0.045
## TeamHeterogeneity2 0.047
## TeamHeterogeneity3 0.046
## TeamFlexibility1 0.044
## TeamFlexibility2 0.044
## TeamFlexibility3 0.044
## TeamRelativeSizeInverseScored- 0.043
## TeamPreference1 0.045
## TeamPreference2 0.044
## TeamPreference3 0.043
##
## Item statistics
## n r r.cor r.drop mean sd
## TeamHeterogeneity1 91 0.66 0.65 0.57 3.6 1.36
## TeamHeterogeneity2 91 0.74 0.75 0.69 3.7 1.36
## TeamHeterogeneity3 91 0.69 0.69 0.63 3.8 1.39
## TeamFlexibility1 91 0.60 0.54 0.50 3.5 1.30
## TeamFlexibility2 91 0.58 0.52 0.47 2.8 1.32
## TeamFlexibility3 91 0.60 0.54 0.50 2.8 1.35
## TeamRelativeSizeInverseScored- 80 0.57 0.49 0.44 3.0 1.28
## TeamPreference1 91 0.72 0.71 0.59 4.2 0.96
## TeamPreference2 91 0.62 0.60 0.46 4.1 1.02
## TeamPreference3 91 0.59 0.57 0.43 4.1 0.99
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## TeamHeterogeneity1 0.09 0.18 0.18 0.21 0.35 0.01
## TeamHeterogeneity2 0.12 0.10 0.12 0.32 0.34 0.01
## TeamHeterogeneity3 0.10 0.14 0.08 0.26 0.42 0.01
## TeamFlexibility1 0.10 0.15 0.15 0.33 0.26 0.01
## TeamFlexibility2 0.22 0.19 0.24 0.24 0.11 0.01
## TeamFlexibility3 0.21 0.25 0.21 0.19 0.14 0.01
## TeamRelativeSizeInverseScored 0.12 0.28 0.24 0.20 0.16 0.13
## TeamPreference1 0.02 0.04 0.12 0.36 0.45 0.01
## TeamPreference2 0.02 0.08 0.12 0.38 0.40 0.01
## TeamPreference3 0.01 0.07 0.18 0.31 0.44 0.01
# Team Heterogeneity
fa.thete <- fa(dplyr::select(smallData, TeamHeterogeneity1:TeamHeterogeneity3),
1)
fa.diagram(fa.thete)
fa.thete
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, TeamHeterogeneity1:TeamHeterogeneity3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## TeamHeterogeneity1 0.87 0.76 0.24 1
## TeamHeterogeneity2 0.92 0.85 0.15 1
## TeamHeterogeneity3 0.88 0.78 0.22 1
##
## MR1
## SS loadings 2.38
## Proportion Var 0.79
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 2.22 with Chi Square of 198.2
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 91 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.96
## Multiple R square of scores with factors 0.92
## Minimum correlation of possible factor scores 0.85
alpha(select(smallData, TeamHeterogeneity1:TeamHeterogeneity3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, TeamHeterogeneity1:TeamHeterogeneity3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.92 0.92 0.89 0.79 12 0.074 3.7 1.3
##
## lower alpha upper 95% confidence boundaries
## 0.78 0.92 1.07
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## TeamHeterogeneity1 0.90 0.90 0.81 0.81 8.6 0.12
## TeamHeterogeneity2 0.87 0.87 0.77 0.77 6.7 0.13
## TeamHeterogeneity3 0.89 0.89 0.80 0.80 8.1 0.13
##
## Item statistics
## n r r.cor r.drop mean sd
## TeamHeterogeneity1 91 0.92 0.86 0.82 3.6 1.4
## TeamHeterogeneity2 91 0.94 0.90 0.86 3.7 1.4
## TeamHeterogeneity3 91 0.93 0.87 0.83 3.8 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## TeamHeterogeneity1 0.09 0.18 0.18 0.21 0.35 0.01
## TeamHeterogeneity2 0.12 0.10 0.12 0.32 0.34 0.01
## TeamHeterogeneity3 0.10 0.14 0.08 0.26 0.42 0.01
# Team Flexibility
fa.tflex <- fa(dplyr::select(smallData, TeamFlexibility1:TeamFlexibility3),
1)
fa.diagram(fa.tflex)
fa.tflex
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, TeamFlexibility1:TeamFlexibility3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## TeamFlexibility1 0.66 0.43 0.57 1
## TeamFlexibility2 0.71 0.51 0.49 1
## TeamFlexibility3 0.75 0.56 0.44 1
##
## MR1
## SS loadings 1.51
## Proportion Var 0.50
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 0.7 with Chi Square of 62.28
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 91 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.87
## Multiple R square of scores with factors 0.76
## Minimum correlation of possible factor scores 0.51
alpha(select(smallData, TeamFlexibility1:TeamFlexibility3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, TeamFlexibility1:TeamFlexibility3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.75 0.75 0.67 0.5 3 0.1 3 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.55 0.75 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## TeamFlexibility1 0.70 0.70 0.54 0.54 2.3 0.16
## TeamFlexibility2 0.66 0.66 0.49 0.49 1.9 0.16
## TeamFlexibility3 0.64 0.64 0.47 0.47 1.8 0.16
##
## Item statistics
## n r r.cor r.drop mean sd
## TeamFlexibility1 91 0.80 0.63 0.55 3.5 1.3
## TeamFlexibility2 91 0.82 0.67 0.58 2.8 1.3
## TeamFlexibility3 91 0.83 0.70 0.60 2.8 1.4
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## TeamFlexibility1 0.10 0.15 0.15 0.33 0.26 0.01
## TeamFlexibility2 0.22 0.19 0.24 0.24 0.11 0.01
## TeamFlexibility3 0.21 0.25 0.21 0.19 0.14 0.01
# Team Group Work
fa.tgwork <- fa(dplyr::select(smallData, TeamPreference1:TeamPreference3), 1)
fa.diagram(fa.tgwork)
fa.tgwork
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, TeamPreference1:TeamPreference3),
## nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## TeamPreference1 0.89 0.79 0.21 1
## TeamPreference2 0.87 0.76 0.24 1
## TeamPreference3 0.81 0.65 0.35 1
##
## MR1
## SS loadings 2.20
## Proportion Var 0.73
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 3 and the objective function was 1.76 with Chi Square of 157
## The degrees of freedom for the model are 0 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0
## The df corrected root mean square of the residuals is NA
##
## The harmonic number of observations is 91 with the empirical chi square 0 with prob < NA
## The total number of observations was 92 with MLE Chi Square = 0 with prob < NA
##
## Tucker Lewis Index of factoring reliability = -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 0.95
## Multiple R square of scores with factors 0.90
## Minimum correlation of possible factor scores 0.80
alpha(select(smallData, TeamPreference1:TeamPreference3))
##
## Reliability analysis
## Call: alpha(x = select(smallData, TeamPreference1:TeamPreference3))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.89 0.89 0.85 0.73 8.2 0.079 4.1 0.9
##
## lower alpha upper 95% confidence boundaries
## 0.74 0.89 1.05
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## TeamPreference1 0.83 0.83 0.70 0.70 4.7 0.14
## TeamPreference2 0.84 0.84 0.72 0.72 5.1 0.14
## TeamPreference3 0.87 0.87 0.78 0.78 6.9 0.13
##
## Item statistics
## n r r.cor r.drop mean sd
## TeamPreference1 91 0.92 0.86 0.81 4.2 0.96
## TeamPreference2 91 0.91 0.85 0.80 4.1 1.02
## TeamPreference3 91 0.89 0.79 0.75 4.1 0.99
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## TeamPreference1 0.02 0.04 0.12 0.36 0.45 0.01
## TeamPreference2 0.02 0.08 0.12 0.38 0.40 0.01
## TeamPreference3 0.01 0.07 0.18 0.31 0.44 0.01
# Business Plan Success
df <- select(smallData, BusinessPlanFinancial:BusinessPlanDoc)
df$num1 <- as.numeric(df[, 1] == "Yes")
df$num2 <- as.numeric(df[, 2] == "Yes")
df$num3 <- as.numeric(df[, 3] == "Yes")
df$num4 <- as.numeric(df[, 4] == "Yes")
fa.bpsuccess <- fa.poly(dplyr::select(df, num1:num4), 1, rotate = "varimax")
## Loading required package: mvtnorm
## Loading required package: parallel
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
fa.diagram(fa.bpsuccess)
fa.bpsuccess
## Factor Analysis using method = minres
## Call: fa.poly(x = dplyr::select(df, num1:num4), nfactors = 1, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## num1 0.62 0.38 0.619 1
## num2 0.74 0.55 0.447 1
## num3 1.00 1.00 0.005 1
## num4 0.63 0.40 0.597 1
##
## MR1
## SS loadings 2.33
## Proportion Var 0.58
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 1.93 with Chi Square of 171.3
## The degrees of freedom for the model are 2 and the objective function was 0.14
##
## The root mean square of the residuals (RMSR) is 0.08
## The df corrected root mean square of the residuals is 0.15
##
## The harmonic number of observations is 92 with the empirical chi square 7.86 with prob < 0.02
## The total number of observations was 92 with MLE Chi Square = 12.31 with prob < 0.0021
##
## Tucker Lewis Index of factoring reliability = 0.811
## RMSEA index = 0.243 and the 90 % confidence intervals are 0.122 0.371
## BIC = 3.27
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## MR1
## Correlation of scores with factors 1.00
## Multiple R square of scores with factors 1.00
## Minimum correlation of possible factor scores 0.99
alpha(dplyr::select(df, num1:num4))
##
## Reliability analysis
## Call: alpha(x = dplyr::select(df, num1:num4))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.67 0.67 0.63 0.34 2 0.093 0.61 0.34
##
## lower alpha upper 95% confidence boundaries
## 0.48 0.67 0.85
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## num1 0.62 0.62 0.55 0.35 1.6 0.12
## num2 0.58 0.58 0.50 0.32 1.4 0.12
## num3 0.49 0.50 0.41 0.25 1.0 0.13
## num4 0.69 0.69 0.60 0.43 2.2 0.11
##
## Item statistics
## n r r.cor r.drop mean sd
## num1 89 0.69 0.52 0.42 0.70 0.46
## num2 90 0.73 0.60 0.48 0.77 0.43
## num3 92 0.80 0.73 0.60 0.65 0.48
## num4 89 0.61 0.39 0.31 0.35 0.48
##
## Non missing response frequency for each item
## 0 1 miss
## num1 0.30 0.70 0.03
## num2 0.23 0.77 0.02
## num3 0.35 0.65 0.00
## num4 0.65 0.35 0.03
In isolation the majority of latent variables seems okay. Next step is to test them all together.
lavaan.PreEntry.model <- " PreEntryBusiness =~ PreEntryBusinessWork + PreEntryBusinessEducation+ PreEntryBusinessHobbies \n PreEntryManagement =~ PreEntryManagement1 + PreEntryManagement2 + PreEntryManagement3 \n PreEntryPlanning =~ PrePlanningMarket+ PrePlanningCompetitive+ PrePlanningBrand + PrePlanningModel\n OrgCapital =~ StartupExperience "
fit.lavaan.PreEntry.model <- cfa(lavaan.PreEntry.model, smallData)
summary(fit.lavaan.PreEntry.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after 66 iterations
##
## Used Total
## Number of observations 89 92
##
## Estimator ML
## Minimum Function Test Statistic 59.587
## Degrees of freedom 39
## P-value (Chi-square) 0.018
##
## Model test baseline model:
##
## Minimum Function Test Statistic 454.970
## Degrees of freedom 55
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 0.949
## Tucker-Lewis Index (TLI) 0.927
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1419.266
## Loglikelihood unrestricted model (H1) -1389.473
##
## Number of free parameters 27
## Akaike (AIC) 2892.532
## Bayesian (BIC) 2959.725
## Sample-size adjusted Bayesian (BIC) 2874.518
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.077
## 90 Percent Confidence Interval 0.032 0.114
## P-value RMSEA <= 0.05 0.133
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.061
##
## Parameter estimates:
##
## Information Expected
## Standard Errors Standard
##
## Estimate Std.err Z-value P(>|z|)
## Latent variables:
## PreEntryBusiness =~
## PrEntryBsnssW 1.000
## PrEntryBsnssE 1.590 0.485 3.281 0.001
## PrEntryBsnssH 1.030 0.282 3.657 0.000
## PreEntryManagement =~
## PrEntryMngmn1 1.000
## PrEntryMngmn2 1.256 0.186 6.769 0.000
## PrEntryMngmn3 1.155 0.182 6.345 0.000
## PreEntryPlanning =~
## PrePlnnngMrkt 1.000
## PrPlnnngCmptt 1.187 0.129 9.210 0.000
## PrePlnnngBrnd 1.224 0.142 8.642 0.000
## PrePlannngMdl 1.318 0.145 9.065 0.000
## OrgCapital =~
## StartupExprnc 1.000
##
## Covariances:
## PreEntryBusiness ~~
## PrEntryMngmnt 0.139 0.076 1.833 0.067
## PrEntryPlnnng 0.138 0.081 1.717 0.086
## OrgCapital 0.055 0.123 0.450 0.653
## PreEntryManagement ~~
## PrEntryPlnnng 0.295 0.106 2.794 0.005
## OrgCapital 0.061 0.162 0.375 0.708
## PreEntryPlanning ~~
## OrgCapital 0.184 0.181 1.017 0.309
##
## Variances:
## PrEntryBsnssW 0.975 0.175
## PrEntryBsnssE 0.247 0.238
## PrEntryBsnssH 1.008 0.182
## PrEntryMngmn1 0.591 0.116
## PrEntryMngmn2 0.261 0.121
## PrEntryMngmn3 0.843 0.161
## PrePlnnngMrkt 0.520 0.094
## PrPlnnngCmptt 0.371 0.084
## PrePlnnngBrnd 0.582 0.113
## PrePlannngMdl 0.510 0.110
## StartupExprnc 0.000
## PreEntryBsnss 0.364 0.168
## PrEntryMngmnt 0.681 0.185
## PrEntryPlnnng 0.878 0.201
## OrgCapital 2.966 0.445
semPaths(fit.lavaan.PreEntry.model, style = "lisrel", what = "std")
lavaan.Networking.model <- " NetworkSupportStrong =~ NetworkSupportStrongSpouse+NetworkSupportStrongRelatives+NetworkSupportStrongFriends \n NetworkSupportWeak =~ NetworkSupportWeakBusiness+NetworkSupportWeakAcquaintances+NetworkSupportWeakEmployers+NetworkSupportWeakCoWorkers\n NetworkInvestorIndirect =~NetworkInvestorIndirect1+NetworkInvestorIndirect2+NetworkInvestorIndirect3\n NetworkInvestorDirect =~ NetworkInvestorDirect1+NetworkInvestorDirect2+NetworkInvestorDirect3\n Satisfaction =~ SatisfactionJob+SatisfactionIncubator\n"
fit.lavaan.Networking.model <- cfa(lavaan.Networking.model, smallData)
summary(fit.lavaan.Networking.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after 66 iterations
##
## Used Total
## Number of observations 48 92
##
## Estimator ML
## Minimum Function Test Statistic 198.742
## Degrees of freedom 80
## P-value (Chi-square) 0.000
##
## Model test baseline model:
##
## Minimum Function Test Statistic 739.380
## Degrees of freedom 105
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 0.813
## Tucker-Lewis Index (TLI) 0.754
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -982.274
## Loglikelihood unrestricted model (H1) -882.903
##
## Number of free parameters 40
## Akaike (AIC) 2044.549
## Bayesian (BIC) 2119.397
## Sample-size adjusted Bayesian (BIC) 1993.907
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.176
## 90 Percent Confidence Interval 0.145 0.207
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.113
##
## Parameter estimates:
##
## Information Expected
## Standard Errors Standard
##
## Estimate Std.err Z-value P(>|z|)
## Latent variables:
## NetworkSupportStrong =~
## NtwrkSpprtStS 1.000
## NtwrkSpprtStR 1.569 0.445 3.529 0.000
## NtwrkSpprtStF 1.420 0.397 3.579 0.000
## NetworkSupportWeak =~
## NtwrkSpprtWkB 1.000
## NtwrkSpprtWkA 0.995 0.266 3.746 0.000
## NtwrkSpprtWkE 1.482 0.310 4.786 0.000
## NtwrkSpprtWCW 1.454 0.300 4.842 0.000
## NetworkInvestorIndirect =~
## NtwrkInvstrI1 1.000
## NtwrkInvstrI2 0.970 0.076 12.804 0.000
## NtwrkInvstrI3 0.905 0.080 11.239 0.000
## NetworkInvestorDirect =~
## NtwrkInvstrD1 1.000
## NtwrkInvstrD2 0.900 0.078 11.579 0.000
## NtwrkInvstrD3 0.830 0.085 9.800 0.000
## Satisfaction =~
## SatisfactinJb 1.000
## StsfctnIncbtr 2.316 0.449 5.154 0.000
##
## Covariances:
## NetworkSupportStrong ~~
## NetwrkSpprtWk 0.122 0.099 1.230 0.219
## NtwrkInvstrIn 0.511 0.231 2.212 0.027
## NtwrkInvstrDr 0.346 0.198 1.749 0.080
## Satisfaction 0.165 0.090 1.833 0.067
## NetworkSupportWeak ~~
## NtwrkInvstrIn 0.504 0.232 2.176 0.030
## NtwrkInvstrDr 0.302 0.206 1.464 0.143
## Satisfaction 0.084 0.085 0.993 0.321
## NetworkInvestorIndirect ~~
## NtwrkInvstrDr 2.214 0.513 4.314 0.000
## Satisfaction 0.781 0.238 3.277 0.001
## NetworkInvestorDirect ~~
## Satisfaction 0.692 0.219 3.164 0.002
##
## Variances:
## NtwrkSpprtStS 1.201 0.263
## NtwrkSpprtStR 0.245 0.199
## NtwrkSpprtStF 0.604 0.202
## NtwrkSpprtWkB 0.816 0.187
## NtwrkSpprtWkA 1.009 0.226
## NtwrkSpprtWkE 0.556 0.189
## NtwrkSpprtWCW 0.431 0.168
## NtwrkInvstrI1 0.410 0.112
## NtwrkInvstrI2 0.263 0.086
## NtwrkInvstrI3 0.414 0.106
## NtwrkInvstrD1 0.273 0.103
## NtwrkInvstrD2 0.380 0.107
## NtwrkInvstrD3 0.556 0.134
## SatisfactinJb 0.375 0.093
## StsfctnIncbtr 0.272 0.288
## NtwrkSpprtStr 0.475 0.257
## NetwrkSpprtWk 0.628 0.258
## NtwrkInvstrIn 2.574 0.608
## NtwrkInvstrDr 2.392 0.548
## Satisfaction 0.395 0.147
semPaths(fit.lavaan.Networking.model, style = "lisrel", what = "std")
Very high correlation between Investment Network (indirect and Direct). Probably should use only one.
lavaan.Team.model <- " TeamHeterogenity =~ TeamHeterogeneity1+TeamHeterogeneity2+TeamHeterogeneity3\n TeamFlexibility =~ TeamFlexibility1+TeamFlexibility2+TeamFlexibility3 \n TeamPreference =~ TeamPreference1+TeamPreference2+TeamPreference3 "
fit.lavaan.Team.model <- cfa(lavaan.Team.model, smallData)
summary(fit.lavaan.Team.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after 31 iterations
##
## Used Total
## Number of observations 91 92
##
## Estimator ML
## Minimum Function Test Statistic 65.474
## Degrees of freedom 24
## P-value (Chi-square) 0.000
##
## Model test baseline model:
##
## Minimum Function Test Statistic 519.312
## Degrees of freedom 36
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 0.914
## Tucker-Lewis Index (TLI) 0.871
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1089.742
## Loglikelihood unrestricted model (H1) -1057.005
##
## Number of free parameters 21
## Akaike (AIC) 2221.484
## Bayesian (BIC) 2274.212
## Sample-size adjusted Bayesian (BIC) 2207.929
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.138
## 90 Percent Confidence Interval 0.098 0.178
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.056
##
## Parameter estimates:
##
## Information Expected
## Standard Errors Standard
##
## Estimate Std.err Z-value P(>|z|)
## Latent variables:
## TeamHeterogenity =~
## TeamHetrgnty1 1.000
## TeamHetrgnty2 1.079 0.091 11.882 0.000
## TeamHetrgnty3 1.042 0.094 11.120 0.000
## TeamFlexibility =~
## TeamFlexblty1 1.000
## TeamFlexblty2 1.004 0.198 5.059 0.000
## TeamFlexblty3 1.101 0.212 5.187 0.000
## TeamPreference =~
## TeamPrefernc1 1.000
## TeamPrefernc2 1.007 0.098 10.264 0.000
## TeamPrefernc3 0.907 0.098 9.299 0.000
##
## Covariances:
## TeamHeterogenity ~~
## TeamFlexiblty 0.511 0.157 3.248 0.001
## TeamPreferenc 0.303 0.122 2.488 0.013
## TeamFlexibility ~~
## TeamPreferenc 0.297 0.110 2.694 0.007
##
## Variances:
## TeamHetrgnty1 0.474 0.094
## TeamHetrgnty2 0.252 0.080
## TeamHetrgnty3 0.425 0.092
## TeamFlexblty1 0.868 0.182
## TeamFlexblty2 0.903 0.187
## TeamFlexblty3 0.824 0.195
## TeamPrefernc1 0.167 0.054
## TeamPrefernc2 0.261 0.062
## TeamPrefernc3 0.353 0.066
## TeamHetergnty 1.355 0.269
## TeamFlexiblty 0.811 0.249
## TeamPreferenc 0.747 0.142
semPaths(fit.lavaan.Team.model, style = "lisrel", what = "std")
lavaan.MFQ.model <- " MFQCareerSupport =~ MFQCareerSupport1+MFQCareerSupport2+MFQCareerSupport3\n MFQPsySupport =~ MFQPsySupport1+MFQPsySupport2+MFQPsySupport3 \n MFQRoleModeling =~ MFQRoleModeling1+MFQRoleModeling2+MFQRoleModeling3 "
fit.lavaan.MFQ.model <- cfa(lavaan.MFQ.model, smallData)
summary(fit.lavaan.MFQ.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after 35 iterations
##
## Used Total
## Number of observations 46 92
##
## Estimator ML
## Minimum Function Test Statistic 30.415
## Degrees of freedom 24
## P-value (Chi-square) 0.171
##
## Model test baseline model:
##
## Minimum Function Test Statistic 364.883
## Degrees of freedom 36
## P-value 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 0.980
## Tucker-Lewis Index (TLI) 0.971
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -532.146
## Loglikelihood unrestricted model (H1) -516.939
##
## Number of free parameters 21
## Akaike (AIC) 1106.292
## Bayesian (BIC) 1144.694
## Sample-size adjusted Bayesian (BIC) 1078.848
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.076
## 90 Percent Confidence Interval 0.000 0.150
## P-value RMSEA <= 0.05 0.291
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.060
##
## Parameter estimates:
##
## Information Expected
## Standard Errors Standard
##
## Estimate Std.err Z-value P(>|z|)
## Latent variables:
## MFQCareerSupport =~
## MFQCarrSpprt1 1.000
## MFQCarrSpprt2 1.243 0.164 7.574 0.000
## MFQCarrSpprt3 1.272 0.168 7.564 0.000
## MFQPsySupport =~
## MFQPsySupprt1 1.000
## MFQPsySupprt2 0.969 0.068 14.274 0.000
## MFQPsySupprt3 0.928 0.099 9.420 0.000
## MFQRoleModeling =~
## MFQRoleMdlng1 1.000
## MFQRoleMdlng2 1.390 0.257 5.405 0.000
## MFQRoleMdlng3 1.494 0.276 5.421 0.000
##
## Covariances:
## MFQCareerSupport ~~
## MFQPsySupport 0.325 0.214 1.519 0.129
## MFQRoleModlng 0.094 0.129 0.732 0.464
## MFQPsySupport ~~
## MFQRoleModlng 0.305 0.179 1.709 0.087
##
## Variances:
## MFQCarrSpprt1 0.482 0.122
## MFQCarrSpprt2 0.291 0.122
## MFQCarrSpprt3 0.311 0.128
## MFQPsySupprt1 0.141 0.074
## MFQPsySupprt2 0.137 0.070
## MFQPsySupprt3 0.601 0.139
## MFQRoleMdlng1 0.767 0.172
## MFQRoleMdlng2 0.096 0.117
## MFQRoleMdlng3 0.396 0.157
## MFQCarerSpprt 0.995 0.299
## MFQPsySupport 1.750 0.399
## MFQRoleModlng 0.649 0.256
semPaths(fit.lavaan.MFQ.model, style = "lisrel", what = "std")
df <- select(smallData, BusinessPlanFinancial:BusinessPlanDoc, SuccessMVP, SuccessLaunch,
InvestmentAngel)
## Error: unused arguments (BusinessPlanFinancial:BusinessPlanDoc,
## SuccessMVP, SuccessLaunch, InvestmentAngel)
df[, c("BusinessPlanFinancial", "BusinessPlanMarket", "BusinessPlanBP", "BusinessPlanDoc",
"SuccessMVP", "SuccessLaunch", "InvestmentAngel")] <- lapply(df[, c("BusinessPlanFinancial",
"BusinessPlanMarket", "BusinessPlanBP", "BusinessPlanDoc", "SuccessMVP",
"SuccessLaunch", "InvestmentAngel")], ordered)
## Error: undefined columns selected
lavaan.Success.model <- " BusinessPlanning =~ BusinessPlanFinancial+BusinessPlanMarket+BusinessPlanBP+BusinessPlanDoc\n Success =~ SuccessMVP+SuccessLaunch \n Investment =~ InvestmentAngel "
fit.lavaan.Success.model <- cfa(lavaan.Success.model, data = df, ordered = c("BusinessPlanFinancial",
"BusinessPlanMarket", "BusinessPlanBP", "BusinessPlanDoc", "SuccessMVP",
"SuccessLaunch", "InvestmentAngel"))
## Error: lavaan ERROR: missing observed variables in dataset: SuccessMVP
## SuccessLaunch InvestmentAngel
summary(fit.lavaan.Success.model, fit.measures = TRUE)
## Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error: object 'fit.lavaan.Success.model' not found
semPaths(fit.lavaan.Success.model, style = "lisrel", what = "std")
## Error: object 'fit.lavaan.Success.model' not found
Not easy to test this in SEM framework with only binary variables
lavaan.Simple.model <- " # Measurement Model\n PreEntryBusiness =~ PreEntryBusinessEducation#+PreEntryBusinessWork #+ PreEntryBusinessHobbies\n PreEntryManagement =~ PreEntryManagement2 + PreEntryManagement3+ PreEntryManagement1 \n PreEntryPlanning =~ PrePlanningCompetitive+ PrePlanningBrand + PrePlanningModel #+ PrePlanningMarket\n #NetworkSupportWeak =~ NetworkSupportWeakBusiness+NetworkSupportWeakAcquaintances+NetworkSupportWeakEmployers+NetworkSupportWeakCoWorkers \n#NetworkSupportStrong =~ NetworkSupportStrongFriends #NetworkSupportStrongRelatives+NetworkSupportStrongSpouse\n # NetworkInvestorIndirect =~NetworkInvestorIndirect1+NetworkInvestorIndirect2+NetworkInvestorIndirect3\n #NetworkInvestorDirect =~ NetworkInvestorDirect1+NetworkInvestorDirect2+NetworkInvestorDirect3\n Survival =~ SurvivalFeb14Ordered\n # Regressions\nSurvival ~ PreEntryBusiness + PreEntryManagement +PreEntryPlanning#+NetworkSupportStrong+NetworkInvestorDirect\n"
PreEntryBusinessWork ~ ~PreEntryBusinessEducation + PreEntryBusinessHobbies
## PreEntryBusinessWork ~ ~PreEntryBusinessEducation + PreEntryBusinessHobbies
smallData$SurvivalFeb14int <- as.integer(factor(smallData$SurvivalFeb14))
smallData[, c("SurvivalFeb14Ordered")] <- as.ordered(smallData[, c("SurvivalFeb14")])
fit.lavaan.Simple.model <- sem(lavaan.Simple.model, smallData, ordered = "SurvivalFeb14Ordered")
## Warning: lavaan WARNING: could not compute standard errors!
##
## Warning: lavaan WARNING: could not compute scaled test statistic
summary(fit.lavaan.Simple.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after 46 iterations
##
## Used Total
## Number of observations 91 92
##
## Estimator DWLS Robust
## Minimum Function Test Statistic 3.531 NA
## Degrees of freedom 15 15
## P-value (Chi-square) 0.999 NA
## Scaling correction factor NA
## Shift parameter
## for simple second-order correction (Mplus variant)
##
## Model test baseline model:
##
## Minimum Function Test Statistic 181.979 100.304
## Degrees of freedom 28 28
## P-value 0.000 0.000
##
## User model versus baseline model:
##
## Comparative Fit Index (CFI) 1.000 NA
## Tucker-Lewis Index (TLI) 1.139 NA
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 NA
## 90 Percent Confidence Interval 0.000 0.000 NA NA
## P-value RMSEA <= 0.05 1.000 NA
##
## Weighted Root Mean Square Residual:
##
## WRMR 0.287 0.287
##
## Parameter estimates:
##
## Information Expected
## Standard Errors Robust.sem
##
## Estimate Std.err Z-value P(>|z|)
## Latent variables:
## PreEntryBusiness =~
## PrEntryBsnssE 1.000
## PreEntryManagement =~
## PrEntryMngmn2 1.000
## PrEntryMngmn3 0.934
## PrEntryMngmn1 0.864
## PreEntryPlanning =~
## PrPlnnngCmptt 1.000
## PrePlnnngBrnd 1.108
## PrePlannngMdl 1.081
## Survival =~
## SrvvlFb14Ordr 1.000
##
## Regressions:
## Survival ~
## PreEntryBsnss 0.128
## PrEntryMngmnt 0.110
## PrEntryPlnnng 0.337
##
## Covariances:
## PreEntryBusiness ~~
## PrEntryMngmnt 0.242
## PrEntryPlnnng 0.298
## PreEntryManagement ~~
## PrEntryPlnnng 0.395
##
## Intercepts:
## PrEntryBsnssE 3.692
## PrEntryMngmn2 2.890
## PrEntryMngmn3 2.703
## PrEntryMngmn1 2.901
## PrPlnnngCmptt 3.187
## PrePlnnngBrnd 3.110
## PrePlannngMdl 3.275
## PreEntryBsnss 0.000
## PrEntryMngmnt 0.000
## PrEntryPlnnng 0.000
## Survival 0.000
##
## Thresholds:
## SrvvlFb14Or|1 -0.265
##
## Variances:
## PrEntryBsnssE 0.000
## PrEntryMngmn2 0.322
## PrEntryMngmn3 0.867
## PrEntryMngmn1 0.520
## PrPlnnngCmptt 0.406
## PrePlnnngBrnd 0.414
## PrePlannngMdl 0.629
## SrvvlFb14Ordr 0.724
## PreEntryBsnss 1.180
## PrEntryMngmnt 0.985
## PrEntryPlnnng 1.174
## Survival 0.050
semPaths(fit.lavaan.Simple.model, style = "lisrel", what = "std")
Unable to compute standard errors and scaled test statistic!
province <- read.table(text = "id str clu wt ue91 meanz hou85 diffhou85 smplrat\n\t\t1 1 1 4 4123 2867 26881 -24014 .25\n\t\t2 1 4 4 760 2867 4896 -2029 .25\n\t\t3 1 5 4 721 2867 3730 -863 .25\n\t\t4 1 15 4 142 2867 556 2311 .25\n\t\t5 1 18 4 187 2867 1463 1404 .25\n\t\t6 1 26 4 331 2867 1946 921 .25\n\t\t7 1 30 4 127 2867 834 2033 .25\n\t\t8 1 31 4 219 2867 932 1935 .25",
header = TRUE)
province$fpc <- 32 #Sample size!
# Design
province.design <- svydesign(id = ~clu, strata = ~str, data = province, weights = ~wt,
fpc = ~fpc)
# Model
(mysvyglm <- svyglm(ue91 ~ hou85, province.design))
## Stratified Independent Sampling design
## svydesign(id = ~clu, strata = ~str, data = province, weights = ~wt,
## fpc = ~fpc)
##
## Call: svyglm(formula = ue91 ~ hou85, province.design)
##
## Coefficients:
## (Intercept) hou85
## 42.655 0.152
##
## Degrees of Freedom: 7 Total (i.e. Null); 6 Residual
## Null Deviance: 12900000
## Residual Deviance: 22500 AIC: 92.2
df <- select(smallData, SurvivalFeb14, PreEntryBusinessWork:PrePlanningModel)
## Error: unused arguments (SurvivalFeb14,
## PreEntryBusinessWork:PrePlanningModel)
xtabs(~SurvivalFeb14 + PreEntryBusinessHobbies, data = smallData)
## PreEntryBusinessHobbies
## SurvivalFeb14 1 2 3 4 5
## 1 2 5 10 15 4
## 2 5 7 7 20 17
Our current incubation outcomes are:
Business plan is measured with 3 variables a la “during the incubator did your team do a business plan.” However for the other variables (MVP:SeriesA), we have dates for the development. It is not possible to attribute the release to the incubator for certain as we do not have a control (startups not coming to the incubator and doing the same outcomes), but it is likely the outcomes are highly related because the accelerator highly encourages finishing at least one of these outcomes.
Specifically, we want to ask how to use the dates data and create an incubation outcomes more numerical data.
First, we have whether the startup had achieved the outcome at the time of survey (end of the incubator). For example, SuccessMVP=Yes/No.
If Yes, then we have the SuccessMVPDate=6/20/2013. This date can be before or during the incubation period.
If No, then we have the SuccessMVPDateAnticipated=12/1/2013. This date is typically during or after the incubation period.
Therefore, we can construct the outcomes during the incubator:
From the SuccessMVPDate, we can see if the outcome was achieved BEFORE, DURING, of AFTER the incubator or 1,2,3.
We can then use the DURING to observe the startups that developed the MVP during the accelerator period.
Let's try it on the MVP first.
Things to do:
Create data set
# First get the incubator outcomes data
df.ioutcomes <- dplyr::select(Data3314, SubjectName, PreEntryBusinessWork:StartupExperience,
SurvivalFeb14, InvestmentAngel, BusinessPlanFinancial:InvestmentSeriesADate,
IncubatorMentor, SatisfactionJob:SUPPresentDemoDay, NetworkInvestorIndirect1:NetworkInvestorDirect3)
True= MVP released during the incubator False= NOT
# Second let's observe how MVPs is distributed
summarize(group_by(df.ioutcomes, SuccessMVP), count = n())
## Source: local data frame [2 x 2]
##
## SuccessMVP count
## 1 No 19
## 2 Yes 73
qplot(data = df.ioutcomes, SuccessMVP, geom = "histogram")
# How many of these MVPs were developed before/during/after the incubator
# period? code dates
class(df.ioutcomes$SuccessMVPDate)
## [1] "character"
df.ioutcomes$SuccessMVPDate[1]
## [1] "6/20/2013"
df.ioutcomes$SMVPDateFormat <- as.Date(df.ioutcomes$SuccessMVPDate, format = "%m/%d/%Y")
summary(df.ioutcomes$SMVPDateFormat, na.rm = TRUE)
## Min. 1st Qu. Median Mean 3rd Qu.
## "2011-05-01" "2012-11-23" "2013-04-01" "2013-02-21" "2013-07-01"
## Max. NA's
## "2014-04-01" "20"
# calculate before/during/after
df.ioutcomes$SMVPOutcome <- (df.ioutcomes$SMVPDateFormat >= as.Date("2013-01-01")) &
(df.ioutcomes$SMVPDateFormat <= as.Date("2013-09-01"))
# View(select(df.ioutcomes, SuccessMVP, SMVPDateFormat, SMVPOutcome)) NAs
# (haven't done it yet) should be FALSE (did not release MVP during
# incubator period)
df.ioutcomes$SMVPOutcome[is.na(df.ioutcomes$SMVPOutcome)] <- FALSE
# replot resummarize
summarize(group_by(df.ioutcomes, SuccessMVP), count = n())
## Source: local data frame [2 x 2]
##
## SuccessMVP count
## 1 No 19
## 2 Yes 73
summarize(group_by(df.ioutcomes, SuccessMVP, SMVPOutcome), count = n(), meanDate = mean(SMVPDateFormat),
na.rm = TRUE)
## Source: local data frame [3 x 5]
## Groups: SuccessMVP
##
## SuccessMVP SMVPOutcome count meanDate na.rm
## 1 No FALSE 19 NA TRUE
## 2 Yes FALSE 23 NA TRUE
## 3 Yes TRUE 50 15835 TRUE
qplot(data = df.ioutcomes, SMVPOutcome, geom = "histogram", fill = SuccessMVP)
# Relationship to:
# StartupExperience
qplot(data = df.ioutcomes, SuccessMVP, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = factor(StartupExperience)) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = factor(StartupExperience)) +
facet_wrap(~StartupExperience, nrow = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")),
colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# Boxie
ggplot(data = df.ioutcomes, aes(StartupExperience, SMVPDateFormat, group = factor(StartupExperience))) +
geom_boxplot()
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
# Survival
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = SurvivalFeb14) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# Angel
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = InvestmentAngel) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
True= Commercial launch done during the incubator False= NOT
# Second let's observe how SuccessLaunch is distributed
summarize(group_by(df.ioutcomes, SuccessLaunch), count = n())
## Source: local data frame [2 x 2]
##
## SuccessLaunch count
## 1 No 51
## 2 Yes 41
qplot(data = df.ioutcomes, SuccessLaunch, geom = "histogram")
# How many of these Successful Launches were developed before/during/after
# the incubator period? code dates
class(df.ioutcomes$SuccessLaunchDate)
## [1] "character"
df.ioutcomes$SuccessLaunchDate[4]
## [1] "5/1/2013"
df.ioutcomes$SLaunchDateFormat <- as.Date(df.ioutcomes$SuccessLaunchDate, format = "%m/%d/%Y")
summary(df.ioutcomes$SLaunchDateFormat, na.rm = TRUE)
## Min. 1st Qu. Median Mean 3rd Qu.
## "2011-10-01" "2012-11-01" "2013-03-16" "2013-02-14" "2013-06-01"
## Max. NA's
## "2013-12-01" "54"
# calculate before/during/after
df.ioutcomes$SLaunchOutcome <- (df.ioutcomes$SLaunchDateFormat >= as.Date("2013-01-01")) &
(df.ioutcomes$SLaunchDateFormat <= as.Date("2013-09-01"))
# View(select(df.ioutcomes, SuccessLaunch, SLaunchDateFormat,
# SLaunchOutcome)) NAs (haven't done it yet) should be FALSE (did not
# release MVP during incubator period)
df.ioutcomes$SLaunchOutcome[is.na(df.ioutcomes$SLaunchOutcome)] <- FALSE
# replot resummarize
summarize(group_by(df.ioutcomes, SuccessLaunch), count = n())
## Source: local data frame [2 x 2]
##
## SuccessLaunch count
## 1 No 51
## 2 Yes 41
summarize(group_by(df.ioutcomes, SuccessLaunch, SLaunchOutcome), count = n(),
meanDate = mean(SLaunchDateFormat), na.rm = TRUE)
## Source: local data frame [3 x 5]
## Groups: SuccessLaunch
##
## SuccessLaunch SLaunchOutcome count meanDate na.rm
## 1 No FALSE 51 NA TRUE
## 2 Yes FALSE 18 NA TRUE
## 3 Yes TRUE 23 15842 TRUE
qplot(data = df.ioutcomes, SLaunchOutcome, geom = "histogram", fill = SuccessLaunch)
# Relationship to:
# StartupExperience
qplot(data = df.ioutcomes, SuccessLaunch, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = factor(StartupExperience)) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = factor(StartupExperience)) +
facet_wrap(~StartupExperience, nrow = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")),
colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# Boxie
ggplot(data = df.ioutcomes, aes(StartupExperience, SLaunchDateFormat, group = factor(StartupExperience))) +
geom_boxplot()
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
# Survival
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = SurvivalFeb14) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# Angel
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = InvestmentAngel) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Seems that more experience shows more successful launches in the incubator
True= First sale done during the incubator False= NOT
# Second let's observe how SuccessFirstSale is distributed
summarize(group_by(df.ioutcomes, SuccessFirstSale), count = n(), meanStartupExperience = mean(StartupExperience,
na.rm = TRUE))
## Source: local data frame [3 x 3]
##
## SuccessFirstSale count meanStartupExperience
## 1 No 57 1.727
## 2 Yes 34 1.765
## 3 NA 1 2.000
# How many of these Successful Launches were developed before/during/after
# the incubator period? code dates
class(df.ioutcomes$SuccessFirstSaleDate)
## [1] "character"
df.ioutcomes$SuccessFirstSaleDate[4]
## [1] "7/1/2013"
df.ioutcomes$SFSaleDateFormat <- as.Date(df.ioutcomes$SuccessFirstSaleDate,
format = "%m/%d/%Y")
summary(df.ioutcomes$SFSaleDateFormat, na.rm = TRUE)
## Min. 1st Qu. Median Mean 3rd Qu.
## "2011-05-01" "2012-11-01" "2013-02-01" "2013-01-19" "2013-06-01"
## Max. NA's
## "2014-11-01" "59"
# calculate before/during/after
df.ioutcomes$SFSaleOutcome <- (df.ioutcomes$SFSaleDateFormat >= as.Date("2013-01-01")) &
(df.ioutcomes$SFSaleDateFormat <= as.Date("2013-09-01"))
# View(select(df.ioutcomes, SuccessFirstSale, SFSaleDateFormat,
# SFSaleOutcome)) NAs (haven't done it yet) should be FALSE (did not release
# MVP during incubator period)
df.ioutcomes$SFSaleOutcome[is.na(df.ioutcomes$SFSaleOutcome)] <- FALSE
# replot resummarize
summarize(group_by(df.ioutcomes, SuccessFirstSale), count = n())
## Source: local data frame [3 x 2]
##
## SuccessFirstSale count
## 1 No 57
## 2 Yes 34
## 3 NA 1
summarize(group_by(df.ioutcomes, SuccessFirstSale, SFSaleOutcome), count = n(),
meanDate = mean(SFSaleDateFormat, na.rm = TRUE))
## Source: local data frame [4 x 4]
## Groups: SuccessFirstSale
##
## SuccessFirstSale SFSaleOutcome count meanDate
## 1 No FALSE 57 2014-02-01
## 2 Yes FALSE 17 2012-09-01
## 3 Yes TRUE 17 2013-04-30
## 4 NA FALSE 1 <NA>
qplot(data = df.ioutcomes, SFSaleOutcome, geom = "histogram", fill = SuccessFirstSale)
qplot(data = summarize(group_by(df.ioutcomes, StartupExperience, SFSaleOutcome),
count = n()), StartupExperience, count, geom = "point", color = SFSaleOutcome)
## Warning: Removed 1 rows containing missing values (geom_point).
# density plot
ggplot(data = df.ioutcomes, aes(x = StartupExperience, color = SFSaleOutcome)) +
geom_density()
## Warning: Removed 2 rows containing non-finite values (stat_density).
# Relationship to:
# StartupExperience Show
qplot(data = df.ioutcomes, SuccessFirstSale, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = factor(StartupExperience)) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = factor(StartupExperience)) +
facet_wrap(~StartupExperience, nrow = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")),
colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
# Boxie
ggplot(data = df.ioutcomes, aes(StartupExperience, SFSaleDateFormat, group = factor(StartupExperience))) +
geom_boxplot()
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).
# Survival
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = SurvivalFeb14) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
# Angel
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = InvestmentAngel) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
Same.
True= Significant increase in sales during the incubator False= NOT
# Second let's observe how SuccessSalesChange is distributed
summarize(group_by(df.ioutcomes, SuccessSalesChange), count = n(), meanStartupExperience = mean(StartupExperience,
na.rm = TRUE))
## Source: local data frame [2 x 3]
##
## SuccessSalesChange count meanStartupExperience
## 1 No 69 1.642
## 2 Yes 23 2.043
# Create True/False variable
df.ioutcomes$SSalesChange <- as.logical(as.numeric(factor(df.ioutcomes$SuccessSalesChange)) -
1)
# What is the distribution of the SuccessSalesChangePercent convert to
# numeric
df.ioutcomes$SuccessSalesChangePercentNumeric <- as.integer(sub(df.ioutcomes$SuccessSalesChangePercent,
pattern = "%", replacement = ""))
qplot(data = df.ioutcomes, SuccessSalesChangePercentNumeric, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# Relationship to:
# StartupExperience Show
qplot(data = df.ioutcomes, SuccessSalesChange, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
A higher experience shows a higher chance of increase in sales
True= Significant increase in employees during the incubator False= NOT
# Second let's observe how SuccessSalesChange is distributed
summarize(group_by(df.ioutcomes, SuccessStaffChange), count = n(), meanStartupExperience = mean(StartupExperience,
na.rm = TRUE))
## Source: local data frame [2 x 3]
##
## SuccessStaffChange count meanStartupExperience
## 1 No 51 1.74
## 2 Yes 41 1.75
# Create True/False variable
df.ioutcomes$SStaffChange <- as.logical(as.numeric(factor(df.ioutcomes$SuccessStaffChange)) -
1)
# What is the distribution of the SuccessSalesChangePercent convert to
# numeric
df.ioutcomes$SuccessStaffChangePercentNumeric <- as.integer(sub(df.ioutcomes$SuccessStaffChangePercent,
pattern = "%", replacement = ""))
qplot(data = df.ioutcomes, SuccessStaffChangePercentNumeric, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# Relationship to:
# StartupExperience Show
qplot(data = df.ioutcomes, SuccessStaffChange, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
No relationship observed between startup experience and employees increase.
# Aggregate startup experience so that those who have more than 3 startups
# are = 4
df.ioutcomes$aggStartupExperience <- df.ioutcomes$StartupExperience
df.ioutcomes$aggStartupExperience[df.ioutcomes$StartupExperience > 3 & !is.na(df.ioutcomes$StartupExperience)] <- 4
# means of incubator outcomes over experience
df <- summarize(group_by(df.ioutcomes, aggStartupExperience), count = n(), MVPs = mean(SMVPOutcome),
Launch = mean(SLaunchOutcome), SFSale = mean(SSalesChange), SalesInc = mean(SStaffChange),
StaffInc = mean(SStaffChange))
summarize(group_by(df.ioutcomes, StartupExperience), count = n(), MVPs = mean(SMVPOutcome),
Launch = mean(SLaunchOutcome), SFSale = mean(SSalesChange), SalesInc = mean(SStaffChange),
StaffInc = mean(SStaffChange))
## Source: local data frame [9 x 7]
##
## StartupExperience count MVPs Launch SFSale SalesInc StaffInc
## 1 0 22 0.5455 0.2727 0.13636 0.3636 0.3636
## 2 1 26 0.3462 0.1154 0.23077 0.4615 0.4615
## 3 2 20 0.5000 0.3000 0.55000 0.5500 0.5500
## 4 3 11 0.7273 0.1818 0.09091 0.4545 0.4545
## 5 4 5 0.6000 0.4000 0.00000 0.2000 0.2000
## 6 5 2 1.0000 0.5000 0.00000 0.5000 0.5000
## 7 6 3 1.0000 0.3333 0.33333 0.6667 0.6667
## 8 10 1 1.0000 1.0000 1.00000 0.0000 0.0000
## 9 NA 2 1.0000 0.5000 0.00000 0.5000 0.5000
# Density plots of incubation outcome Vs StartupExperience
ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SMVPOutcome)) +
geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SLaunchOutcome)) +
geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SFSaleOutcome)) +
geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SSalesChange)) +
geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SStaffChange)) +
geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(data = df) + geom_line(aes(aggStartupExperience, MVPs), color = "red") +
geom_line(aes(aggStartupExperience, Launch), color = "blue") + geom_line(aes(aggStartupExperience,
SFSale), color = "green") + geom_line(aes(aggStartupExperience, SalesInc),
color = "black") + geom_line(aes(aggStartupExperience, SalesInc), color = "yellow")
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
# Plotting color timeline with aggregated startup experience MVP
qplot(data = df.ioutcomes, SuccessMVP, aggStartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "density", group = factor(aggStartupExperience),
color = aggStartupExperience, adjust = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")),
colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "red", linetype = "longdash") + scale_colour_gradient(low = "green",
high = "blue")
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning: Removed 9 rows containing non-finite values (stat_density).
## Warning: Removed 3 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing non-finite values (stat_density).
# Launch
qplot(data = df.ioutcomes, SuccessLaunch, aggStartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = factor(aggStartupExperience)) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
# First Sale
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = factor(aggStartupExperience)) +
geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red",
linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")),
colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
qplot(data = df.ioutcomes, SuccessFirstSale, aggStartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
# Get Pre-Entry business and management ready Pairs plot
pairs.panels(dplyr::select(smallData, SurvivalFeb14, PreEntryBusinessWork:PreEntryManagement3),
pch = ".", main = "Survival and PreEntry Management and Business")
# CFA of pre entry business and management
fa.preentrybusiness <- fa(dplyr::select(smallData, PreEntryBusinessWork:PrePlanningModel),
3)
fa.diagram(fa.preentrybusiness)
fa.preentrybusiness
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, PreEntryBusinessWork:PrePlanningModel),
## nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 MR3 h2 u2 com
## PreEntryBusinessWork -0.04 0.10 0.48 0.26 0.74 1.1
## PreEntryBusinessEducation 0.07 0.00 0.82 0.69 0.31 1.0
## PreEntryBusinessHobbies -0.13 0.05 0.56 0.32 0.68 1.1
## PreEntryManagement1 0.08 0.71 -0.07 0.52 0.48 1.1
## PreEntryManagement2 0.01 0.90 0.01 0.81 0.19 1.0
## PreEntryManagement3 -0.05 0.70 0.11 0.52 0.48 1.1
## PrePlanningMarket 0.76 0.11 -0.09 0.63 0.37 1.1
## PrePlanningCompetitive 0.85 0.09 -0.02 0.77 0.23 1.0
## PrePlanningBrand 0.82 -0.01 0.05 0.69 0.31 1.0
## PrePlanningModel 0.90 -0.11 0.06 0.77 0.23 1.0
##
## MR1 MR2 MR3
## SS loadings 2.84 1.88 1.27
## Proportion Var 0.28 0.19 0.13
## Cumulative Var 0.28 0.47 0.60
## Proportion Explained 0.47 0.31 0.21
## Cumulative Proportion 0.47 0.79 1.00
##
## With factor correlations of
## MR1 MR2 MR3
## MR1 1.00 0.35 0.17
## MR2 0.35 1.00 0.25
## MR3 0.17 0.25 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 45 and the objective function was 4.84 with Chi Square of 420.1
## The degrees of freedom for the model are 18 and the objective function was 0.41
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic number of observations is 92 with the empirical chi square 12.68 with prob < 0.81
## The total number of observations was 92 with MLE Chi Square = 34.36 with prob < 0.011
##
## Tucker Lewis Index of factoring reliability = 0.888
## RMSEA index = 0.107 and the 90 % confidence intervals are 0.046 0.149
## BIC = -47.03
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1 MR2 MR3
## Correlation of scores with factors 0.96 0.93 0.87
## Multiple R square of scores with factors 0.91 0.87 0.76
## Minimum correlation of possible factor scores 0.83 0.74 0.51
alpha(select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## Error: unused argument (PreEntryBusinessWork:PreEntryBusinessHobbies)
# Create Averages pre-entry business, management, and planning averages
df.ioutcomes$PreEntryBusinesAve <- rowMeans(df.ioutcomes[c("PreEntryBusinessWork",
"PreEntryBusinessEducation", "PreEntryBusinessHobbies")])
df.ioutcomes$PreEntryMgtAve <- rowMeans(df.ioutcomes[c("PreEntryManagement1",
"PreEntryManagement2", "PreEntryManagement3")])
df.ioutcomes$PreEntryPlanAve <- rowMeans(df.ioutcomes[c("PrePlanningMarket",
"PrePlanningCompetitive", "PrePlanningBrand", "PrePlanningModel")])
fa.preentrybusiness <- fa(dplyr::select(df.ioutcomes, PreEntryBusinesAve:PreEntryPlanAve),
1)
fa.diagram(fa.preentrybusiness)
pairs.panels(dplyr::select(df.ioutcomes, PreEntryBusinesAve:PreEntryPlanAve),
pch = ".", main = "Survival and PreEntry Management and Business")
# MVP~PreEntryExperience
preentry.vars <- "PreEntryBusinesAve + PreEntryMgtAve + PreEntryPlanAve +StartupExperience"
formula.MVP <- paste0("SMVPOutcome ~ ", preentry.vars)
summary(glm(as.formula(formula.MVP), data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = as.formula(formula.MVP), family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.523 -1.036 0.470 0.928 1.902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.384 1.149 -0.33 0.7382
## PreEntryBusinesAve -0.441 0.296 -1.49 0.1364
## PreEntryMgtAve 0.847 0.285 2.97 0.0029 **
## PreEntryPlanAve -0.277 0.225 -1.23 0.2173
## StartupExperience 0.363 0.158 2.30 0.0215 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 123.10 on 88 degrees of freedom
## Residual deviance: 105.87 on 84 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 115.9
##
## Number of Fisher Scoring iterations: 3
# Launch~PreEntryExperience
formula.Launch <- paste0("SLaunchOutcome ~ ", preentry.vars) #nothing really significant
summary(glm(as.formula(formula.Launch), data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = as.formula(formula.Launch), family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.291 -0.787 -0.556 -0.306 2.245
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.286 1.438 -2.28 0.022 *
## PreEntryBusinesAve -0.237 0.309 -0.77 0.444
## PreEntryMgtAve 0.449 0.281 1.60 0.110
## PreEntryPlanAve 0.387 0.254 1.52 0.128
## StartupExperience 0.211 0.141 1.50 0.134
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.543 on 88 degrees of freedom
## Residual deviance: 89.801 on 84 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 99.8
##
## Number of Fisher Scoring iterations: 4
summary(glm(SLaunchOutcome ~ PreEntryBusinesAve + PreEntryManagement2 + PrePlanningModel +
StartupExperience, data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = SLaunchOutcome ~ PreEntryBusinesAve + PreEntryManagement2 +
## PrePlanningModel + StartupExperience, family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.412 -0.791 -0.519 -0.203 2.300
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.590 1.497 -2.40 0.016 *
## PreEntryBusinesAve -0.303 0.326 -0.93 0.352
## PreEntryManagement2 0.519 0.270 1.92 0.055 .
## PrePlanningModel 0.441 0.215 2.05 0.040 *
## StartupExperience 0.233 0.142 1.64 0.101
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 100.11 on 89 degrees of freedom
## Residual deviance: 87.86 on 85 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 97.86
##
## Number of Fisher Scoring iterations: 5
formula.Launch <- "SLaunchOutcome ~ PreEntryBusinesAve + PreEntryManagement2 + PrePlanningModel + StartupExperience"
# 1stSale~PreEntryExperience
formula.1stSale <- paste0("SFSaleOutcome ~ ", preentry.vars)
summary(glm(as.formula(formula.1stSale), data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = as.formula(formula.1stSale), family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.008 -0.697 -0.541 -0.374 2.350
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.40209 1.53648 -2.21 0.027 *
## PreEntryBusinesAve 0.04219 0.33252 0.13 0.899
## PreEntryMgtAve -0.06186 0.29293 -0.21 0.833
## PreEntryPlanAve 0.58205 0.27684 2.10 0.036 *
## StartupExperience -0.00341 0.15507 -0.02 0.982
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 86.808 on 88 degrees of freedom
## Residual deviance: 81.539 on 84 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 91.54
##
## Number of Fisher Scoring iterations: 4
# GrowthSales~PreEntryExperience
formula.Sales <- paste0("SSalesChange ~ ", preentry.vars)
summary(glm(as.formula(formula.Sales), data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = as.formula(formula.Sales), family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.365 -0.747 -0.444 0.520 2.109
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6833 1.8219 -3.67 0.00024 ***
## PreEntryBusinesAve 0.8011 0.3729 2.15 0.03171 *
## PreEntryMgtAve -0.3072 0.3133 -0.98 0.32681
## PreEntryPlanAve 0.9827 0.2946 3.34 0.00085 ***
## StartupExperience 0.0525 0.1501 0.35 0.72647
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 101.710 on 88 degrees of freedom
## Residual deviance: 80.664 on 84 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 90.66
##
## Number of Fisher Scoring iterations: 5
# GrowthEmployees~PreEntryExperience
formula.Staff <- paste0("SStaffChange ~ ", preentry.vars)
summary(glm(as.formula(formula.Staff), data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = as.formula(formula.Staff), family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.012 -0.926 -0.357 0.992 2.167
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.4254 1.5584 -3.48 0.0005 ***
## PreEntryBusinesAve 0.3224 0.3027 1.07 0.2868
## PreEntryMgtAve 0.5290 0.2696 1.96 0.0497 *
## PreEntryPlanAve 0.8086 0.2480 3.26 0.0011 **
## StartupExperience -0.0917 0.1365 -0.67 0.5017
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 122.469 on 88 degrees of freedom
## Residual deviance: 98.337 on 84 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 108.3
##
## Number of Fisher Scoring iterations: 4
add these controls:
# Resource investment (convert from % to num)
df.ioutcomes$ResourceInvestment <- as.integer(sub(df.ioutcomes$ResourceInvestment,
pattern = "%", replacement = ""))
df.ioutcomes$ResourceOperations <- as.integer(sub(df.ioutcomes$ResourceOperations,
pattern = "%", replacement = ""))
df.ioutcomes$ResourceHuman <- as.integer(sub(df.ioutcomes$ResourceHuman, pattern = "%",
replacement = ""))
# Team age (use ordered factor)
ordered(df.ioutcomes$TeamAge)
## [1] 25-34 years 25-34 years 25-34 years 25-34 years 16-24 years
## [6] 25-34 years 25-34 years 35-44 years 25-34 years 25-34 years
## [11] 25-34 years 35-44 years 15-34 years 25-34 years 25-34 years
## [16] 24-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [21] 25-34 years 25-34 years 35-44 years 25-34 years 25-34 years
## [26] 35-44 years 25-34 years 25-34 years 25-34 years 16-24 years
## [31] 16-24 years 16-24 years 25-34 years 16-24 years 16-24 years
## [36] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [41] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [46] 25-34 years 25-34 years 25-34 years 25-34 years 16-24 years
## [51] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [56] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [61] 35-44 years 25-34 years 25-34 years 35-44 years 35-44 years
## [66] 35-44 years 35-44 years 25-34 years 25-34 years 25-34 years
## [71] 35-44 years 25-34 years 25-34 years 25-34 years 25-34 years
## [76] 25-34 years 25-34 years 35-44 years 35-44 years 25-34 years
## [81] 35-44 years 25-34 years 35-44 years 25-34 years 25-34 years
## [86] 35-44 years 25-34 years 35-44 years 25-34 years 25-34 years
## [91] 25-34 years 25-34 years
## 5 Levels: 15-34 years < 16-24 years < 24-34 years < ... < 35-44 years
# Create Averages Team Heterogeneity, Flexibility, and Preference
df.ioutcomes$HeterogeneityAve <- rowMeans(df.ioutcomes[c("TeamHeterogeneity1",
"TeamHeterogeneity2", "TeamHeterogeneity3")])
df.ioutcomes$FlexibilityAve <- rowMeans(df.ioutcomes[c("TeamFlexibility1", "TeamFlexibility2",
"TeamFlexibility3")])
df.ioutcomes$PreferenceAve <- rowMeans(df.ioutcomes[c("TeamPreference1", "TeamPreference2",
"TeamPreference3")])
# Nationality (use factors)
factor(df.ioutcomes$PersonalNationality)
## [1] Argentina England Argentina Argentina Indian
## [6] India USA England Peru Chile
## [11] Brazil Italy USA Argentina India
## [16] Mexico USA USA Brazil Mexico
## [21] USA Mexico India USA Venezuela
## [26] Brazil USA Italy Italy USA
## [31] India Israel Canada Uruguay China
## [36] USA USA USA Pakistan USA
## [41] England USA USA USA Argentina
## [46] Argentina France Argentina Chile Argentina
## [51] Argentina Ecuador Spain Kenya France
## [56] India USA USA Mexico Indonesia
## [61] England Chile Chile Peru Venezuela
## [66] Argentina Greece Ukraine Austria USA
## [71] Chile India Spain USA Russia
## [76] Spain Chile Netherlands Uruguay New Zealand
## [81] Chile Chile Chile Chile Peru
## [86] Bolivia Chile USA Chile Chile
## [91] Sir Lanka USA
## 29 Levels: Argentina Austria Bolivia Brazil Canada Chile China ... Venezuela
And now let's add these controls too:
as.logical(as.integer(factor(df.ioutcomes$IncubatorMentor)) - 1)
## [1] TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [12] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## [23] TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE
## [34] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
## [45] FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
## [56] FALSE TRUE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE
## [67] FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE
## [78] TRUE TRUE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE TRUE
## [89] FALSE FALSE FALSE FALSE
# Fix Personal Degrees (' Vs ’ ')
unique(df.ioutcomes$PersonalDegree)
## [1] "Master’s degree" "Bachelor’s degree" "Doctorate degree"
## [4] "Master's degree" "Bachelor's degree" "High school"
## [7] "Professional degree"
df.ioutcomes$PersonalDegree[df.ioutcomes$PersonalDegree == "Bachelor’s degree"] <- "Bachelor's degree"
df.ioutcomes$PersonalDegree[df.ioutcomes$PersonalDegree == "Master’s degree"] <- "Master's degree"
df.ioutcomes$PersonalDegree[df.ioutcomes$PersonalDegree == "Professional degree"] <- "Doctorate degree"
qplot(data = df.ioutcomes, PersonalDegree, geom = "histogram")
# Ethnia
unique(df.ioutcomes$PersonalEthnicity)
## [1] "Latino or Hispanic" "Caucasian (non-Hispanic)"
## [3] "Asian/Pacific Islanders" "African-American (non-Hispanic)"
## [5] "Other"
qplot(data = df.ioutcomes, PersonalEthnicity, geom = "histogram")
class(df.ioutcomes$StartupAge)
## [1] "integer"
qplot(data = df.ioutcomes, StartupMBAs, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
qplot(data = df.ioutcomes, StartupPhDs, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
df.ioutcomes$SUPFastTrack[df.ioutcomes$SUPFastTrack == ""] <- NA
qplot(data = df.ioutcomes, SUPFastTrack, geom = "histogram")
df.ioutcomes$SUPApplyDemoDay[df.ioutcomes$SUPApplyDemoDay == ""] <- NA
qplot(data = df.ioutcomes, SUPApplyDemoDay, geom = "histogram")
df.ioutcomes$SUPPresentDemoDay[df.ioutcomes$SUPPresentDemoDay == ""] <- NA
qplot(data = df.ioutcomes, SUPPresentDemoDay, geom = "histogram")
Adding all the controls to the basic models:
# MVP: MGT + Startup Experience are important
# With all
control.vars <- "SatisfactionJob + SatisfactionIncubator + ResourceInvestment + ResourceOperations + ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) + HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor + factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge + StartupMBAs + StartupPhDs + SUPFastTrack + SUPApplyDemoDay + SUPPresentDemoDay"
formula.MVP2 <- paste0(formula.MVP, " + ", control.vars)
summary(glm(as.formula(formula.MVP2), family=binomial, data=df.ioutcomes)) #MgtAve still significant, but StartupExperience not. Need to remove some variables. Control variables SatisfactionIncubator, StartupAge, SUPFastTrack.
##
## Call:
## glm(formula = as.formula(formula.MVP2), family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0080 -0.5065 0.0507 0.6743 1.7475
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 14.8200 5594.9135
## PreEntryBusinesAve -0.3233 0.4770
## PreEntryMgtAve 1.1935 0.4906
## PreEntryPlanAve 0.1364 0.4701
## StartupExperience 0.3685 0.3272
## SatisfactionJob -0.2625 0.5773
## SatisfactionIncubator 0.8369 0.4783
## ResourceInvestment 0.0862 0.1787
## ResourceOperations 0.1091 0.1706
## ResourceHuman 0.1412 0.1725
## TeamNumberOwners 0.1675 0.4230
## factor(df.ioutcomes$TeamAge)16-24 years -17.0912 3956.1809
## factor(df.ioutcomes$TeamAge)24-34 years 1.3455 5594.8842
## factor(df.ioutcomes$TeamAge)25-34 years -14.5021 3956.1807
## factor(df.ioutcomes$TeamAge)35-44 years -13.9638 3956.1809
## HeterogeneityAve -0.0594 0.3791
## FlexibilityAve -1.0609 0.6520
## PreferenceAve 0.1765 0.4114
## IncubatorMentorYes 1.1328 1.0218
## factor(PersonalDegree)Doctorate degree 1.0779 2.1703
## factor(PersonalDegree)Master's degree 1.0045 0.8940
## factor(PersonalEthnicity)Asian/Pacific Islanders -13.1631 3956.1811
## factor(PersonalEthnicity)Caucasian (non-Hispanic) -11.9621 3956.1810
## factor(PersonalEthnicity)Latino or Hispanic -13.4540 3956.1809
## factor(PersonalEthnicity)Other -13.3596 3956.1809
## StartupAge -0.2027 0.0870
## StartupMBAs -0.0235 0.4219
## StartupPhDs -2.2087 1.4101
## SUPFastTrackYes -4.8506 2.0914
## SUPApplyDemoDayYes 1.7876 1.1971
## SUPPresentDemoDayYes 2.2418 1.5814
## z value Pr(>|z|)
## (Intercept) 0.00 0.998
## PreEntryBusinesAve -0.68 0.498
## PreEntryMgtAve 2.43 0.015 *
## PreEntryPlanAve 0.29 0.772
## StartupExperience 1.13 0.260
## SatisfactionJob -0.45 0.649
## SatisfactionIncubator 1.75 0.080 .
## ResourceInvestment 0.48 0.630
## ResourceOperations 0.64 0.522
## ResourceHuman 0.82 0.413
## TeamNumberOwners 0.40 0.692
## factor(df.ioutcomes$TeamAge)16-24 years 0.00 0.997
## factor(df.ioutcomes$TeamAge)24-34 years 0.00 1.000
## factor(df.ioutcomes$TeamAge)25-34 years 0.00 0.997
## factor(df.ioutcomes$TeamAge)35-44 years 0.00 0.997
## HeterogeneityAve -0.16 0.875
## FlexibilityAve -1.63 0.104
## PreferenceAve 0.43 0.668
## IncubatorMentorYes 1.11 0.268
## factor(PersonalDegree)Doctorate degree 0.50 0.619
## factor(PersonalDegree)Master's degree 1.12 0.261
## factor(PersonalEthnicity)Asian/Pacific Islanders 0.00 0.997
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 0.00 0.998
## factor(PersonalEthnicity)Latino or Hispanic 0.00 0.997
## factor(PersonalEthnicity)Other 0.00 0.997
## StartupAge -2.33 0.020 *
## StartupMBAs -0.06 0.956
## StartupPhDs -1.57 0.117
## SUPFastTrackYes -2.32 0.020 *
## SUPApplyDemoDayYes 1.49 0.135
## SUPPresentDemoDayYes 1.42 0.156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 115.255 on 83 degrees of freedom
## Residual deviance: 62.501 on 53 degrees of freedom
## (8 observations deleted due to missingness)
## AIC: 124.5
##
## Number of Fisher Scoring iterations: 16
formula.MVP2 <- paste0(formula.MVP, " + SatisfactionIncubator + StartupAge + SUPFastTrack")
summary(logit.MVP <- glm(as.formula(formula.MVP2), family=binomial, data=df.ioutcomes)) #MgtAve still significant, but StartupExperience not. Need to remove some variables. Control variables SatisfactionIncubator, StartupAge, SUPFastTrackYes.
##
## Call:
## glm(formula = as.formula(formula.MVP2), family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.066 -0.860 0.415 0.827 1.795
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2894 1.2717 -0.23 0.820
## PreEntryBusinesAve -0.4287 0.3103 -1.38 0.167
## PreEntryMgtAve 0.7079 0.3135 2.26 0.024 *
## PreEntryPlanAve -0.2525 0.2877 -0.88 0.380
## StartupExperience 0.3350 0.1750 1.91 0.056 .
## SatisfactionIncubator 0.5304 0.2370 2.24 0.025 *
## StartupAge -0.1072 0.0537 -2.00 0.046 *
## SUPFastTrackYes -1.1639 0.6772 -1.72 0.086 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 118.476 on 85 degrees of freedom
## Residual deviance: 90.803 on 78 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 106.8
##
## Number of Fisher Scoring iterations: 4
exp(coef(logit.MVP))
## (Intercept) PreEntryBusinesAve PreEntryMgtAve
## 0.7487 0.6514 2.0297
## PreEntryPlanAve StartupExperience SatisfactionIncubator
## 0.7768 1.3980 1.6996
## StartupAge SUPFastTrackYes
## 0.8983 0.3123
### CONTINUE UPDATING FORMULAS BELOW
### CONSIDER USING A VARIABLE SELECTION PROCESS FOR THE CONTROL VARIABLES
## LAUNCH
# Launch: MGT is important
# With all
formula.Launch2 <- paste0(formula.Launch, " + ", control.vars)
summary(glm(as.formula(formula.Launch2), family=binomial, data=df.ioutcomes))
##
## Call:
## glm(formula = as.formula(formula.Launch2), family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5918 -0.4071 -0.0048 0.0000 1.7288
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -5.31e+01 1.52e+04
## PreEntryBusinesAve -1.08e+00 9.28e-01
## PreEntryManagement2 1.06e+00 5.98e-01
## PrePlanningModel 7.92e-01 4.99e-01
## StartupExperience 7.43e-02 2.89e-01
## SatisfactionJob 5.69e-01 9.39e-01
## SatisfactionIncubator 6.87e-01 5.71e-01
## ResourceInvestment 6.10e-01 3.67e-01
## ResourceOperations 5.53e-01 3.62e-01
## ResourceHuman 5.00e-01 3.52e-01
## TeamNumberOwners 2.60e-01 6.33e-01
## factor(df.ioutcomes$TeamAge)16-24 years 1.99e+01 1.08e+04
## factor(df.ioutcomes$TeamAge)24-34 years 6.70e-01 1.52e+04
## factor(df.ioutcomes$TeamAge)25-34 years 1.94e+01 1.08e+04
## factor(df.ioutcomes$TeamAge)35-44 years 1.81e+01 1.08e+04
## HeterogeneityAve -2.40e-01 5.79e-01
## FlexibilityAve 2.76e-01 5.67e-01
## PreferenceAve -2.18e+00 8.90e-01
## IncubatorMentorYes 7.06e-01 1.44e+00
## factor(PersonalDegree)Doctorate degree 1.60e-01 2.67e+00
## factor(PersonalDegree)High school -2.23e+01 1.08e+04
## factor(PersonalDegree)Master's degree -2.06e-01 1.22e+00
## factor(PersonalEthnicity)Asian/Pacific Islanders -4.22e+01 1.10e+04
## factor(PersonalEthnicity)Caucasian (non-Hispanic) -1.86e+01 1.08e+04
## factor(PersonalEthnicity)Latino or Hispanic -1.94e+01 1.08e+04
## factor(PersonalEthnicity)Other -4.07e+01 1.15e+04
## StartupAge -6.52e-02 9.61e-02
## StartupMBAs -9.42e-01 7.58e-01
## StartupPhDs -9.97e-01 1.47e+00
## SUPFastTrackYes -2.13e+00 1.86e+00
## SUPApplyDemoDayYes 1.82e+00 1.56e+00
## SUPPresentDemoDayYes 1.97e+00 1.76e+00
## z value Pr(>|z|)
## (Intercept) 0.00 0.997
## PreEntryBusinesAve -1.16 0.245
## PreEntryManagement2 1.77 0.077 .
## PrePlanningModel 1.59 0.112
## StartupExperience 0.26 0.797
## SatisfactionJob 0.61 0.544
## SatisfactionIncubator 1.20 0.229
## ResourceInvestment 1.66 0.097 .
## ResourceOperations 1.53 0.126
## ResourceHuman 1.42 0.155
## TeamNumberOwners 0.41 0.682
## factor(df.ioutcomes$TeamAge)16-24 years 0.00 0.999
## factor(df.ioutcomes$TeamAge)24-34 years 0.00 1.000
## factor(df.ioutcomes$TeamAge)25-34 years 0.00 0.999
## factor(df.ioutcomes$TeamAge)35-44 years 0.00 0.999
## HeterogeneityAve -0.41 0.679
## FlexibilityAve 0.49 0.627
## PreferenceAve -2.45 0.014 *
## IncubatorMentorYes 0.49 0.624
## factor(PersonalDegree)Doctorate degree 0.06 0.952
## factor(PersonalDegree)High school 0.00 0.998
## factor(PersonalDegree)Master's degree -0.17 0.866
## factor(PersonalEthnicity)Asian/Pacific Islanders 0.00 0.997
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 0.00 0.999
## factor(PersonalEthnicity)Latino or Hispanic 0.00 0.999
## factor(PersonalEthnicity)Other 0.00 0.997
## StartupAge -0.68 0.497
## StartupMBAs -1.24 0.214
## StartupPhDs -0.68 0.499
## SUPFastTrackYes -1.15 0.251
## SUPApplyDemoDayYes 1.17 0.243
## SUPPresentDemoDayYes 1.12 0.264
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95.044 on 84 degrees of freedom
## Residual deviance: 45.959 on 53 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 110
##
## Number of Fisher Scoring iterations: 18
# ResourceInvestment + ResourceOperations + ResourceHuman + PreferenceAve significant
# keep SUPFastTrack too
formula.Launch2 <- paste0(formula.Launch, " + ResourceInvestment + ResourceOperations + ResourceHuman + PreferenceAve + SUPFastTrack")
summary(glm(as.formula(formula.Launch2), family=binomial, data=df.ioutcomes))
##
## Call:
## glm(formula = as.formula(formula.Launch2), family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.367 -0.735 -0.454 -0.128 2.336
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -21.686 16.129 -1.34 0.179
## PreEntryBusinesAve -0.330 0.361 -0.92 0.360
## PreEntryManagement2 0.632 0.285 2.22 0.027 *
## PrePlanningModel 0.598 0.270 2.22 0.026 *
## StartupExperience 0.223 0.154 1.44 0.148
## ResourceInvestment 0.203 0.164 1.24 0.215
## ResourceOperations 0.204 0.159 1.29 0.198
## ResourceHuman 0.192 0.157 1.23 0.220
## PreferenceAve -0.608 0.359 -1.69 0.090 .
## SUPFastTrackYes -0.197 0.661 -0.30 0.766
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95.044 on 84 degrees of freedom
## Residual deviance: 77.650 on 75 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 97.65
##
## Number of Fisher Scoring iterations: 5
# 1st Sale: StartupExperience is important
# With all
summary(glm(SFSaleOutcome~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
+ SatisfactionJob + SatisfactionIncubator
+ ResourceInvestment + ResourceOperations + ResourceHuman
+ TeamNumberOwners
+ factor(df.ioutcomes$TeamAge) #bad
+ HeterogeneityAve + FlexibilityAve + PreferenceAve
+ IncubatorMentor
+ factor(PersonalDegree)
+ factor(PersonalEthnicity)
+ StartupAge
+ StartupMBAs+ StartupPhDs
, family=binomial, data=df.ioutcomes))
##
## Call:
## glm(formula = SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel + StartupExperience + SatisfactionJob +
## SatisfactionIncubator + ResourceInvestment + ResourceOperations +
## ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) +
## HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor +
## factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge +
## StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8383 -0.4009 -0.0484 -0.0001 2.1919
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -89.0488 9224.4474
## PreEntryBusinesAve 0.3939 0.6482
## PreEntryMgtAve 0.1943 0.5487
## PrePlanningModel 0.8657 0.5928
## StartupExperience -0.2721 0.2715
## SatisfactionJob 2.8589 1.2273
## SatisfactionIncubator 0.7420 0.6589
## ResourceInvestment 0.4664 0.2725
## ResourceOperations 0.4480 0.2624
## ResourceHuman 0.4928 0.2690
## TeamNumberOwners -0.1681 0.4228
## factor(df.ioutcomes$TeamAge)16-24 years 8.9663 6522.6400
## factor(df.ioutcomes$TeamAge)24-34 years -6.1646 9224.4044
## factor(df.ioutcomes$TeamAge)25-34 years 8.6734 6522.6399
## factor(df.ioutcomes$TeamAge)35-44 years 8.0779 6522.6402
## HeterogeneityAve -0.8847 0.6994
## FlexibilityAve -0.0141 0.6136
## PreferenceAve -0.3062 0.6865
## IncubatorMentorYes -0.1867 1.1177
## factor(PersonalDegree)Doctorate degree -0.5782 2.0910
## factor(PersonalDegree)High school -12.7711 6522.6395
## factor(PersonalDegree)Master's degree 0.2856 1.1815
## factor(PersonalEthnicity)Asian/Pacific Islanders 14.4698 6522.6392
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 15.2282 6522.6390
## factor(PersonalEthnicity)Latino or Hispanic 16.0578 6522.6390
## factor(PersonalEthnicity)Other -3.6802 6893.6640
## StartupAge 0.0383 0.0770
## StartupMBAs 1.5377 0.6441
## StartupPhDs -4.1610 1.9897
## z value Pr(>|z|)
## (Intercept) -0.01 0.992
## PreEntryBusinesAve 0.61 0.543
## PreEntryMgtAve 0.35 0.723
## PrePlanningModel 1.46 0.144
## StartupExperience -1.00 0.316
## SatisfactionJob 2.33 0.020 *
## SatisfactionIncubator 1.13 0.260
## ResourceInvestment 1.71 0.087 .
## ResourceOperations 1.71 0.088 .
## ResourceHuman 1.83 0.067 .
## TeamNumberOwners -0.40 0.691
## factor(df.ioutcomes$TeamAge)16-24 years 0.00 0.999
## factor(df.ioutcomes$TeamAge)24-34 years 0.00 0.999
## factor(df.ioutcomes$TeamAge)25-34 years 0.00 0.999
## factor(df.ioutcomes$TeamAge)35-44 years 0.00 0.999
## HeterogeneityAve -1.26 0.206
## FlexibilityAve -0.02 0.982
## PreferenceAve -0.45 0.656
## IncubatorMentorYes -0.17 0.867
## factor(PersonalDegree)Doctorate degree -0.28 0.782
## factor(PersonalDegree)High school 0.00 0.998
## factor(PersonalDegree)Master's degree 0.24 0.809
## factor(PersonalEthnicity)Asian/Pacific Islanders 0.00 0.998
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 0.00 0.998
## factor(PersonalEthnicity)Latino or Hispanic 0.00 0.998
## factor(PersonalEthnicity)Other 0.00 1.000
## StartupAge 0.50 0.618
## StartupMBAs 2.39 0.017 *
## StartupPhDs -2.09 0.037 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.449 on 87 degrees of freedom
## Residual deviance: 46.407 on 59 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 104.4
##
## Number of Fisher Scoring iterations: 17
# SatisfactionJob + ResourceInvestment + ResourceOperations + ResourceHuman + StartupMBAs + StartupPhDs
# With Select
summary(glm(SFSaleOutcome~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
+ SatisfactionJob + ResourceInvestment + ResourceOperations + ResourceHuman + StartupMBAs + StartupPhDs
, family=binomial , data=df.ioutcomes))
##
## Call:
## glm(formula = SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel + StartupExperience + SatisfactionJob +
## ResourceInvestment + ResourceOperations + ResourceHuman +
## StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6888 -0.5597 -0.1990 -0.0485 2.4843
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -58.1289 26.3481 -2.21 0.0274 *
## PreEntryBusinesAve 0.3146 0.4345 0.72 0.4690
## PreEntryMgtAve 0.1385 0.3513 0.39 0.6934
## PrePlanningModel 0.6593 0.3805 1.73 0.0831 .
## StartupExperience -0.0405 0.1678 -0.24 0.8091
## SatisfactionJob 1.4562 0.6150 2.37 0.0179 *
## ResourceInvestment 0.4615 0.2521 1.83 0.0672 .
## ResourceOperations 0.4554 0.2444 1.86 0.0625 .
## ResourceHuman 0.4614 0.2447 1.89 0.0594 .
## StartupMBAs 1.0906 0.3779 2.89 0.0039 **
## StartupPhDs -3.0535 1.4627 -2.09 0.0368 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 86.808 on 88 degrees of freedom
## Residual deviance: 58.671 on 78 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 80.67
##
## Number of Fisher Scoring iterations: 7
# Sales Change:
# With all
summary(glm(SSalesChange~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
+ SatisfactionJob + SatisfactionIncubator
+ ResourceInvestment + ResourceOperations + ResourceHuman
+ TeamNumberOwners
+ factor(df.ioutcomes$TeamAge) #bad
+ HeterogeneityAve + FlexibilityAve + PreferenceAve
+ IncubatorMentor
+ factor(PersonalDegree)
+ factor(PersonalEthnicity)
+ StartupAge
+ StartupMBAs+ StartupPhDs
, family=binomial, data=df.ioutcomes))
##
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel + StartupExperience + SatisfactionJob +
## SatisfactionIncubator + ResourceInvestment + ResourceOperations +
## ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) +
## HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor +
## factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge +
## StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1490 -0.3942 -0.0818 0.0000 2.4587
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -4.14e+01 9.22e+03
## PreEntryBusinesAve 2.98e-01 7.02e-01
## PreEntryMgtAve 2.88e-01 5.58e-01
## PrePlanningModel 7.22e-01 6.10e-01
## StartupExperience 5.43e-01 4.65e-01
## SatisfactionJob 2.21e+00 1.04e+00
## SatisfactionIncubator -1.00e+00 5.95e-01
## ResourceInvestment 4.85e-03 1.59e-01
## ResourceOperations 2.84e-02 1.51e-01
## ResourceHuman -1.09e-03 1.53e-01
## TeamNumberOwners 2.85e-01 5.20e-01
## factor(df.ioutcomes$TeamAge)16-24 years 1.27e+01 6.52e+03
## factor(df.ioutcomes$TeamAge)24-34 years -1.04e+00 9.22e+03
## factor(df.ioutcomes$TeamAge)25-34 years 1.31e+01 6.52e+03
## factor(df.ioutcomes$TeamAge)35-44 years 7.99e+00 6.52e+03
## HeterogeneityAve 1.96e-01 5.36e-01
## FlexibilityAve -1.32e+00 8.47e-01
## PreferenceAve 3.31e-01 5.73e-01
## IncubatorMentorYes 5.46e-01 1.15e+00
## factor(PersonalDegree)Doctorate degree 7.03e-01 2.13e+00
## factor(PersonalDegree)High school -1.37e+01 6.52e+03
## factor(PersonalDegree)Master's degree 3.36e+00 1.68e+00
## factor(PersonalEthnicity)Asian/Pacific Islanders 1.10e+01 6.52e+03
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 1.27e+01 6.52e+03
## factor(PersonalEthnicity)Latino or Hispanic 1.10e+01 6.52e+03
## factor(PersonalEthnicity)Other -5.20e+00 7.02e+03
## StartupAge 2.08e-01 1.02e-01
## StartupMBAs -5.86e-01 4.82e-01
## StartupPhDs 1.91e+00 1.33e+00
## z value Pr(>|z|)
## (Intercept) 0.00 0.996
## PreEntryBusinesAve 0.42 0.671
## PreEntryMgtAve 0.52 0.606
## PrePlanningModel 1.18 0.236
## StartupExperience 1.17 0.242
## SatisfactionJob 2.11 0.035 *
## SatisfactionIncubator -1.69 0.092 .
## ResourceInvestment 0.03 0.976
## ResourceOperations 0.19 0.851
## ResourceHuman -0.01 0.994
## TeamNumberOwners 0.55 0.584
## factor(df.ioutcomes$TeamAge)16-24 years 0.00 0.998
## factor(df.ioutcomes$TeamAge)24-34 years 0.00 1.000
## factor(df.ioutcomes$TeamAge)25-34 years 0.00 0.998
## factor(df.ioutcomes$TeamAge)35-44 years 0.00 0.999
## HeterogeneityAve 0.36 0.715
## FlexibilityAve -1.56 0.118
## PreferenceAve 0.58 0.563
## IncubatorMentorYes 0.47 0.635
## factor(PersonalDegree)Doctorate degree 0.33 0.742
## factor(PersonalDegree)High school 0.00 0.998
## factor(PersonalDegree)Master's degree 2.00 0.046 *
## factor(PersonalEthnicity)Asian/Pacific Islanders 0.00 0.999
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 0.00 0.998
## factor(PersonalEthnicity)Latino or Hispanic 0.00 0.999
## factor(PersonalEthnicity)Other 0.00 0.999
## StartupAge 2.04 0.041 *
## StartupMBAs -1.22 0.224
## StartupPhDs 1.44 0.150
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 96.713 on 87 degrees of freedom
## Residual deviance: 45.085 on 59 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 103.1
##
## Number of Fisher Scoring iterations: 17
# With Select: Planning + business + are important
summary(glm(SSalesChange~ PreEntryBusinesAve+PreEntryMgtAve+PreEntryPlanAve+StartupExperience
+ SatisfactionJob + SatisfactionIncubator
+ factor(df.ioutcomes$TeamAge) #bad
+ HeterogeneityAve + FlexibilityAve + PreferenceAve
, family=binomial, data=df.ioutcomes))
##
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve +
## PreEntryPlanAve + StartupExperience + SatisfactionJob + SatisfactionIncubator +
## factor(df.ioutcomes$TeamAge) + HeterogeneityAve + FlexibilityAve +
## PreferenceAve, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.584 -0.721 -0.287 0.089 2.018
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -24.331 2399.546 -0.01
## PreEntryBusinesAve 0.924 0.429 2.15
## PreEntryMgtAve -0.160 0.360 -0.44
## PreEntryPlanAve 1.040 0.391 2.66
## StartupExperience 0.113 0.192 0.59
## SatisfactionJob 0.996 0.551 1.81
## SatisfactionIncubator -0.483 0.316 -1.53
## factor(df.ioutcomes$TeamAge)16-24 years 11.983 2399.546 0.00
## factor(df.ioutcomes$TeamAge)24-34 years -2.831 3393.469 0.00
## factor(df.ioutcomes$TeamAge)25-34 years 13.536 2399.545 0.01
## factor(df.ioutcomes$TeamAge)35-44 years 11.977 2399.545 0.00
## HeterogeneityAve 0.064 0.344 0.19
## FlexibilityAve -0.305 0.423 -0.72
## PreferenceAve 0.306 0.430 0.71
## Pr(>|z|)
## (Intercept) 0.9919
## PreEntryBusinesAve 0.0312 *
## PreEntryMgtAve 0.6564
## PreEntryPlanAve 0.0078 **
## StartupExperience 0.5576
## SatisfactionJob 0.0708 .
## SatisfactionIncubator 0.1255
## factor(df.ioutcomes$TeamAge)16-24 years 0.9960
## factor(df.ioutcomes$TeamAge)24-34 years 0.9993
## factor(df.ioutcomes$TeamAge)25-34 years 0.9955
## factor(df.ioutcomes$TeamAge)35-44 years 0.9960
## HeterogeneityAve 0.8523
## FlexibilityAve 0.4708
## PreferenceAve 0.4770
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 98.971 on 87 degrees of freedom
## Residual deviance: 69.997 on 74 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 98
##
## Number of Fisher Scoring iterations: 15
# Employee Change: Planning + MGT + are important
# With all
summary(glm(SStaffChange~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
+ SatisfactionJob + SatisfactionIncubator
+ ResourceInvestment + ResourceOperations + ResourceHuman
+ TeamNumberOwners
+ factor(df.ioutcomes$TeamAge) #bad
+ HeterogeneityAve + FlexibilityAve + PreferenceAve
+ IncubatorMentor
+ factor(PersonalDegree)
+ factor(PersonalEthnicity)
+ StartupAge
+ StartupMBAs+ StartupPhDs
, family=binomial, data=df.ioutcomes))
##
## Call:
## glm(formula = SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel + StartupExperience + SatisfactionJob +
## SatisfactionIncubator + ResourceInvestment + ResourceOperations +
## ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) +
## HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor +
## factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge +
## StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.333 -0.440 -0.113 0.378 2.085
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -16.2898 5594.9305
## PreEntryBusinesAve -0.3528 0.4519
## PreEntryMgtAve 1.4770 0.6484
## PrePlanningModel 0.2835 0.4090
## StartupExperience 0.1314 0.2667
## SatisfactionJob 1.1527 0.6330
## SatisfactionIncubator -0.4240 0.3866
## ResourceInvestment 0.1176 0.2270
## ResourceOperations 0.0850 0.2129
## ResourceHuman 0.0858 0.2148
## TeamNumberOwners -0.1748 0.4597
## factor(df.ioutcomes$TeamAge)16-24 years -15.0980 3956.1816
## factor(df.ioutcomes$TeamAge)24-34 years 0.7288 5594.8842
## factor(df.ioutcomes$TeamAge)25-34 years -19.6115 3956.1806
## factor(df.ioutcomes$TeamAge)35-44 years -20.1721 3956.1809
## HeterogeneityAve -0.2077 0.4486
## FlexibilityAve 0.0769 0.6218
## PreferenceAve -0.3733 0.5568
## IncubatorMentorYes -0.2395 0.8928
## factor(PersonalDegree)Doctorate degree 0.5435 1.5481
## factor(PersonalDegree)High school -17.9487 3956.1816
## factor(PersonalDegree)Master's degree -1.0434 1.0045
## factor(PersonalEthnicity)Asian/Pacific Islanders 18.2716 3956.1810
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 18.7923 3956.1809
## factor(PersonalEthnicity)Latino or Hispanic 17.7442 3956.1808
## factor(PersonalEthnicity)Other 17.7172 3956.1810
## StartupAge 0.2457 0.0822
## StartupMBAs 1.9590 0.7954
## StartupPhDs 2.1658 1.3824
## z value Pr(>|z|)
## (Intercept) 0.00 0.9977
## PreEntryBusinesAve -0.78 0.4349
## PreEntryMgtAve 2.28 0.0227 *
## PrePlanningModel 0.69 0.4882
## StartupExperience 0.49 0.6222
## SatisfactionJob 1.82 0.0686 .
## SatisfactionIncubator -1.10 0.2728
## ResourceInvestment 0.52 0.6044
## ResourceOperations 0.40 0.6897
## ResourceHuman 0.40 0.6894
## TeamNumberOwners -0.38 0.7038
## factor(df.ioutcomes$TeamAge)16-24 years 0.00 0.9970
## factor(df.ioutcomes$TeamAge)24-34 years 0.00 0.9999
## factor(df.ioutcomes$TeamAge)25-34 years 0.00 0.9960
## factor(df.ioutcomes$TeamAge)35-44 years -0.01 0.9959
## HeterogeneityAve -0.46 0.6434
## FlexibilityAve 0.12 0.9016
## PreferenceAve -0.67 0.5026
## IncubatorMentorYes -0.27 0.7885
## factor(PersonalDegree)Doctorate degree 0.35 0.7255
## factor(PersonalDegree)High school 0.00 0.9964
## factor(PersonalDegree)Master's degree -1.04 0.2989
## factor(PersonalEthnicity)Asian/Pacific Islanders 0.00 0.9963
## factor(PersonalEthnicity)Caucasian (non-Hispanic) 0.00 0.9962
## factor(PersonalEthnicity)Latino or Hispanic 0.00 0.9964
## factor(PersonalEthnicity)Other 0.00 0.9964
## StartupAge 2.99 0.0028 **
## StartupMBAs 2.46 0.0138 *
## StartupPhDs 1.57 0.1172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 120.352 on 87 degrees of freedom
## Residual deviance: 59.973 on 59 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 118
##
## Number of Fisher Scoring iterations: 16
# With Select
summary(glm(SStaffChange~ PreEntryBusinesAve+PreEntryMgtAve+PreEntryPlanAve+StartupExperience
+ SatisfactionJob + SatisfactionIncubator
+ factor(df.ioutcomes$TeamAge) #bad
+ HeterogeneityAve + FlexibilityAve + PreferenceAve
, data=df.ioutcomes))
##
## Call:
## glm(formula = SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve +
## PreEntryPlanAve + StartupExperience + SatisfactionJob + SatisfactionIncubator +
## factor(df.ioutcomes$TeamAge) + HeterogeneityAve + FlexibilityAve +
## PreferenceAve, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.847 -0.379 0.000 0.343 0.853
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.0781 0.5490 0.14
## PreEntryBusinesAve 0.0622 0.0569 1.09
## PreEntryMgtAve 0.1182 0.0570 2.07
## PreEntryPlanAve 0.1651 0.0551 3.00
## StartupExperience -0.0167 0.0298 -0.56
## SatisfactionJob 0.1455 0.0701 2.08
## SatisfactionIncubator -0.0619 0.0477 -1.30
## factor(df.ioutcomes$TeamAge)16-24 years -0.6025 0.5186 -1.16
## factor(df.ioutcomes$TeamAge)24-34 years -0.2462 0.6455 -0.38
## factor(df.ioutcomes$TeamAge)25-34 years -0.6963 0.4677 -1.49
## factor(df.ioutcomes$TeamAge)35-44 years -0.7392 0.4838 -1.53
## HeterogeneityAve -0.0217 0.0505 -0.43
## FlexibilityAve -0.0399 0.0623 -0.64
## PreferenceAve -0.0492 0.0595 -0.83
## Pr(>|t|)
## (Intercept) 0.8873
## PreEntryBusinesAve 0.2778
## PreEntryMgtAve 0.0415 *
## PreEntryPlanAve 0.0037 **
## StartupExperience 0.5767
## SatisfactionJob 0.0413 *
## SatisfactionIncubator 0.1983
## factor(df.ioutcomes$TeamAge)16-24 years 0.2490
## factor(df.ioutcomes$TeamAge)24-34 years 0.7040
## factor(df.ioutcomes$TeamAge)25-34 years 0.1409
## factor(df.ioutcomes$TeamAge)35-44 years 0.1308
## HeterogeneityAve 0.6687
## FlexibilityAve 0.5244
## PreferenceAve 0.4102
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1985)
##
## Null deviance: 21.716 on 87 degrees of freedom
## Residual deviance: 14.688 on 74 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 122.2
##
## Number of Fisher Scoring iterations: 2
Appears to be the case that MGT + Bus experience seem more relevant for early stages and then Planning is important for growth and later stages.
Explore basic model with only Network Investor Indirect/direct
# Pairs plot
pairs.panels(dplyr::select(df.ioutcomes, SMVPOutcome, SLaunchOutcome, SFSaleOutcome,
NetworkInvestorIndirect1:NetworkInvestorDirect3), pch = ".", main = "Investment Indirect/Direct")
# CFA of pre entry business and management
fa.NetworkInvestor <- fa(dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorDirect3),
2)
fa.diagram(fa.NetworkInvestor)
fa.NetworkInvestor
## Factor Analysis using method = minres
## Call: fa(r = dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorDirect3),
## nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## NetworkInvestorIndirect1 0.95 -0.02 0.87 0.13 1.0
## NetworkInvestorIndirect2 0.91 0.01 0.84 0.16 1.0
## NetworkInvestorIndirect3 0.83 0.07 0.78 0.22 1.0
## NetworkInvestorDirect1 0.20 0.70 0.74 0.26 1.2
## NetworkInvestorDirect2 0.16 0.68 0.65 0.35 1.1
## NetworkInvestorDirect3 -0.10 0.95 0.77 0.23 1.0
##
## MR1 MR2
## SS loadings 2.64 2.01
## Proportion Var 0.44 0.33
## Cumulative Var 0.44 0.77
## Proportion Explained 0.57 0.43
## Cumulative Proportion 0.57 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.00 0.75
## MR2 0.75 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 15 and the objective function was 5.05 with Chi Square of 445.5
## The degrees of freedom for the model are 4 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 89 with the empirical chi square 1.04 with prob < 0.9
## The total number of observations was 92 with MLE Chi Square = 10.62 with prob < 0.031
##
## Tucker Lewis Index of factoring reliability = 0.941
## RMSEA index = 0.14 and the 90 % confidence intervals are 0.037 0.235
## BIC = -7.47
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of scores with factors 0.97 0.94
## Multiple R square of scores with factors 0.94 0.89
## Minimum correlation of possible factor scores 0.89 0.78
alpha(select(smallData, NetworkInvestorIndirect1:NetworkInvestorDirect3))
## Error: unused argument (NetworkInvestorIndirect1:NetworkInvestorDirect3)
# Create Averages Networking Investor Indirect + Networking Investor Direct
df.ioutcomes$NetInvInd <- rowMeans(df.ioutcomes[c("NetworkInvestorIndirect1",
"NetworkInvestorIndirect2")])
df.ioutcomes$NetInvDir <- rowMeans(df.ioutcomes[c("NetworkInvestorDirect1",
"NetworkInvestorDirect2")])
pairs.panels(dplyr::select(df.ioutcomes, SMVPOutcome, SLaunchOutcome, SFSaleOutcome,
NetInvDir, NetInvInd), pch = ".", main = "Investment Indirect/Direct")
## MVP
summary(logit.MVP) #PreEntryMgtAve
##
## Call:
## glm(formula = as.formula(formula.MVP2), family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.066 -0.860 0.415 0.827 1.795
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2894 1.2717 -0.23 0.820
## PreEntryBusinesAve -0.4287 0.3103 -1.38 0.167
## PreEntryMgtAve 0.7079 0.3135 2.26 0.024 *
## PreEntryPlanAve -0.2525 0.2877 -0.88 0.380
## StartupExperience 0.3350 0.1750 1.91 0.056 .
## SatisfactionIncubator 0.5304 0.2370 2.24 0.025 *
## StartupAge -0.1072 0.0537 -2.00 0.046 *
## SUPFastTrackYes -1.1639 0.6772 -1.72 0.086 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 118.476 on 85 degrees of freedom
## Residual deviance: 90.803 on 78 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 106.8
##
## Number of Fisher Scoring iterations: 4
formula.MVP3 <- paste0(formula.MVP2, " + PreEntryMgtAve*IncubatorMentor")
summary(logit.MVP <- glm(as.formula(formula.MVP3), family = binomial, data = df.ioutcomes)) #MgtAve still significant, but StartupExperience not. Need to remove some variables. Control variables SatisfactionIncubator, StartupAge, SUPFastTrackYes.
##
## Call:
## glm(formula = as.formula(formula.MVP3), family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.281 -0.735 0.302 0.768 2.062
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.5300 1.7951 -1.41 0.1587
## PreEntryBusinesAve -0.4621 0.3482 -1.33 0.1844
## PreEntryMgtAve 1.6614 0.5678 2.93 0.0034 **
## PreEntryPlanAve -0.5949 0.3376 -1.76 0.0780 .
## StartupExperience 0.3310 0.1862 1.78 0.0754 .
## SatisfactionIncubator 0.5796 0.2720 2.13 0.0331 *
## StartupAge -0.0761 0.0519 -1.47 0.1427
## SUPFastTrackYes -1.2246 0.7101 -1.72 0.0846 .
## IncubatorMentorYes 5.0674 2.0302 2.50 0.0126 *
## PreEntryMgtAve:IncubatorMentorYes -1.5407 0.6951 -2.22 0.0267 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 118.476 on 85 degrees of freedom
## Residual deviance: 83.123 on 76 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 103.1
##
## Number of Fisher Scoring iterations: 5
exp(coef(logit.MVP))
## (Intercept) PreEntryBusinesAve
## 0.07966 0.62996
## PreEntryMgtAve PreEntryPlanAve
## 5.26684 0.55160
## StartupExperience SatisfactionIncubator
## 1.39243 1.78525
## StartupAge SUPFastTrackYes
## 0.92674 0.29387
## IncubatorMentorYes PreEntryMgtAve:IncubatorMentorYes
## 158.75989 0.21424
# PreEntryMgtAve = 1 => slope of IncubatorMentorYes = 5 - 1*-1.5 = 3.5 =>
# odds ratio = exp(3.5) = 33.1 PreEntryMgtAve = 2 => slope of
# IncubatorMentorYes = 5 - 2*-1.5 = 2 => odds ratio = exp(2) = 7.4
# PreEntryMgtAve = 3 => slope of IncubatorMentorYes = 5 - 3*-1.5 = 0.5 =>
# odds ratio = exp(0.5) = 1.65 PreEntryMgtAve = 4 => slope of
# IncubatorMentorYes = 5 - 4*-1.5 = -1 => odds ratio = exp(-1) = 0.37
# PreEntryMgtAve = 5 => slope of IncubatorMentorYes = 5 - 5*-1.5 = -2.5 =>
# odds ratio = exp(-2.5) = 0.08
## MVP
# MVP~Investment Direct/indirect + PREEEXP
summary(glm(SMVPOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SMVPOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
## NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.958 -1.103 0.765 1.013 1.821
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.512 0.539 -0.95 0.343
## NetworkInvestorIndirect1 0.364 0.302 1.21 0.227
## NetworkInvestorIndirect2 0.495 0.319 1.55 0.120
## NetworkInvestorIndirect3 -0.677 0.304 -2.23 0.026 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 124.37 on 89 degrees of freedom
## Residual deviance: 115.88 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 123.9
##
## Number of Fisher Scoring iterations: 4
# Indirect3 is better
summary(glm(SMVPOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SMVPOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
## NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.39 -1.19 1.01 1.10 1.25
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1182 0.4976 -0.24 0.81
## NetworkInvestorDirect1 0.1162 0.2182 0.53 0.59
## NetworkInvestorDirect2 0.0734 0.2144 0.34 0.73
## NetworkInvestorDirect3 -0.1152 0.2337 -0.49 0.62
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 122.83 on 88 degrees of freedom
## Residual deviance: 122.15 on 85 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 130.1
##
## Number of Fisher Scoring iterations: 4
# Direct none is significant
summary(glm(SMVPOutcome ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SMVPOutcome ~ NetInvInd + NetInvDir, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.635 -1.197 0.875 1.083 1.393
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.493 0.546 -0.90 0.37
## NetInvInd 0.359 0.219 1.64 0.10
## NetInvDir -0.179 0.230 -0.78 0.44
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 122.83 on 88 degrees of freedom
## Residual deviance: 119.56 on 86 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 125.6
##
## Number of Fisher Scoring iterations: 4
# Indirect is better but still not significant
summary(glm(SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 + PreEntryMgtAve *
NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial(logit))) #PreMgtAve and interaction terms are significant for MVP
##
## Call:
## glm(formula = SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 +
## PreEntryMgtAve * NetworkInvestorIndirect3, family = binomial(logit),
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.623 -0.945 0.370 0.913 1.928
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -2.47525 1.54731 -1.60
## StartupExperience -0.39264 0.43741 -0.90
## NetworkInvestorIndirect3 0.00162 0.48152 0.00
## PreEntryMgtAve 1.42302 0.60517 2.35
## StartupExperience:NetworkInvestorIndirect3 0.23000 0.12825 1.79
## NetworkInvestorIndirect3:PreEntryMgtAve -0.20945 0.16271 -1.29
## Pr(>|z|)
## (Intercept) 0.110
## StartupExperience 0.369
## NetworkInvestorIndirect3 0.997
## PreEntryMgtAve 0.019 *
## StartupExperience:NetworkInvestorIndirect3 0.073 .
## NetworkInvestorIndirect3:PreEntryMgtAve 0.198
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 121.81 on 87 degrees of freedom
## Residual deviance: 101.34 on 82 degrees of freedom
## (4 observations deleted due to missingness)
## AIC: 113.3
##
## Number of Fisher Scoring iterations: 4
# with controls
summary(glm(SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 + PreEntryMgtAve *
NetworkInvestorIndirect3 + SatisfactionIncubator + StartupAge + SUPFastTrack,
data = df.ioutcomes, family = binomial(logit)))
##
## Call:
## glm(formula = SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 +
## PreEntryMgtAve * NetworkInvestorIndirect3 + SatisfactionIncubator +
## StartupAge + SUPFastTrack, family = binomial(logit), data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.852 -0.892 0.449 0.767 1.927
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -1.6863 1.7475 -0.97
## StartupExperience -0.3186 0.4625 -0.69
## NetworkInvestorIndirect3 -0.0552 0.5322 -0.10
## PreEntryMgtAve 1.1154 0.6128 1.82
## SatisfactionIncubator 0.3872 0.2370 1.63
## StartupAge -0.1132 0.0525 -2.16
## SUPFastTrackYes -0.9357 0.6739 -1.39
## StartupExperience:NetworkInvestorIndirect3 0.2031 0.1376 1.48
## NetworkInvestorIndirect3:PreEntryMgtAve -0.1551 0.1736 -0.89
## Pr(>|z|)
## (Intercept) 0.335
## StartupExperience 0.491
## NetworkInvestorIndirect3 0.917
## PreEntryMgtAve 0.069 .
## SatisfactionIncubator 0.102
## StartupAge 0.031 *
## SUPFastTrackYes 0.165
## StartupExperience:NetworkInvestorIndirect3 0.140
## NetworkInvestorIndirect3:PreEntryMgtAve 0.372
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 117.26 on 84 degrees of freedom
## Residual deviance: 88.62 on 76 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 106.6
##
## Number of Fisher Scoring iterations: 4
## LAUNCH
# Launch~PreEntryExperience + PREEXP
summary(glm(as.formula(formula.Launch2), family = binomial, data = df.ioutcomes))
##
## Call:
## glm(formula = as.formula(formula.Launch2), family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.367 -0.735 -0.454 -0.128 2.336
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -21.686 16.129 -1.34 0.179
## PreEntryBusinesAve -0.330 0.361 -0.92 0.360
## PreEntryManagement2 0.632 0.285 2.22 0.027 *
## PrePlanningModel 0.598 0.270 2.22 0.026 *
## StartupExperience 0.223 0.154 1.44 0.148
## ResourceInvestment 0.203 0.164 1.24 0.215
## ResourceOperations 0.204 0.159 1.29 0.198
## ResourceHuman 0.192 0.157 1.23 0.220
## PreferenceAve -0.608 0.359 -1.69 0.090 .
## SUPFastTrackYes -0.197 0.661 -0.30 0.766
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95.044 on 84 degrees of freedom
## Residual deviance: 77.650 on 75 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 97.65
##
## Number of Fisher Scoring iterations: 5
# MgtAve and PlanningModel significant
summary(glm(SLaunchOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SLaunchOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
## NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.224 -0.831 -0.599 0.664 1.987
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1160 0.7060 -3.00 0.0027 **
## NetworkInvestorIndirect1 0.0902 0.3510 0.26 0.7971
## NetworkInvestorIndirect2 0.5856 0.3556 1.65 0.0996 .
## NetworkInvestorIndirect3 -0.3851 0.3179 -1.21 0.2258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 102.304 on 89 degrees of freedom
## Residual deviance: 96.477 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 104.5
##
## Number of Fisher Scoring iterations: 4
# Indirect 2 is better
summary(glm(SLaunchOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SLaunchOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
## NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.175 -0.700 -0.635 -0.432 1.877
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.646 0.624 -2.64 0.0084 **
## NetworkInvestorDirect1 0.392 0.247 1.58 0.1134
## NetworkInvestorDirect2 0.189 0.246 0.77 0.4433
## NetworkInvestorDirect3 -0.507 0.273 -1.86 0.0629 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.543 on 88 degrees of freedom
## Residual deviance: 94.578 on 85 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 102.6
##
## Number of Fisher Scoring iterations: 4
# Direct3 is significant
summary(glm(SLaunchOutcome ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SLaunchOutcome ~ NetInvInd + NetInvDir, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.048 -0.827 -0.626 -0.479 2.038
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.255 0.752 -3.00 0.0027 **
## NetInvInd 0.422 0.250 1.69 0.0914 .
## NetInvDir -0.110 0.243 -0.45 0.6511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.543 on 88 degrees of freedom
## Residual deviance: 95.494 on 86 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 101.5
##
## Number of Fisher Scoring iterations: 4
# Indirect better
formula.Launch3 <- paste0(formula.Launch2, "+ (PreEntryMgtAve+PrePlanningModel)*(NetworkInvestorIndirect2+NetworkInvestorDirect3)")
summary(glm(as.formula(formula.Launch3), data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = as.formula(formula.Launch3), family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6002 -0.6044 -0.2336 -0.0155 2.4496
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -25.3980 16.4688 -1.54
## PreEntryBusinesAve -0.1807 0.4230 -0.43
## PreEntryManagement2 1.3096 0.8196 1.60
## PrePlanningModel -0.5237 0.8243 -0.64
## StartupExperience 0.5503 0.2202 2.50
## ResourceInvestment 0.2772 0.1662 1.67
## ResourceOperations 0.2861 0.1586 1.80
## ResourceHuman 0.2599 0.1563 1.66
## PreferenceAve -0.9159 0.4965 -1.84
## SUPFastTrackYes 0.7827 0.9751 0.80
## PreEntryMgtAve -0.9706 1.2935 -0.75
## NetworkInvestorIndirect2 -1.3316 1.8256 -0.73
## NetworkInvestorDirect3 -1.3119 2.2225 -0.59
## PreEntryMgtAve:NetworkInvestorIndirect2 0.6229 0.5261 1.18
## PreEntryMgtAve:NetworkInvestorDirect3 -0.5921 0.5328 -1.11
## PrePlanningModel:NetworkInvestorIndirect2 0.0711 0.2357 0.30
## PrePlanningModel:NetworkInvestorDirect3 0.5517 0.3258 1.69
## Pr(>|z|)
## (Intercept) 0.123
## PreEntryBusinesAve 0.669
## PreEntryManagement2 0.110
## PrePlanningModel 0.525
## StartupExperience 0.012 *
## ResourceInvestment 0.095 .
## ResourceOperations 0.071 .
## ResourceHuman 0.096 .
## PreferenceAve 0.065 .
## SUPFastTrackYes 0.422
## PreEntryMgtAve 0.453
## NetworkInvestorIndirect2 0.466
## NetworkInvestorDirect3 0.555
## PreEntryMgtAve:NetworkInvestorIndirect2 0.236
## PreEntryMgtAve:NetworkInvestorDirect3 0.266
## PrePlanningModel:NetworkInvestorIndirect2 0.763
## PrePlanningModel:NetworkInvestorDirect3 0.090 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.108 on 81 degrees of freedom
## Residual deviance: 58.429 on 65 degrees of freedom
## (10 observations deleted due to missingness)
## AIC: 92.43
##
## Number of Fisher Scoring iterations: 6
# NetworkInvestorDirect3 significant
## FIRST SALE
# 1stSale~Investor Ties + PEEXP
summary(glm(SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve + PrePlanningModel +
StartupExperience, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel + StartupExperience, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.915 -0.714 -0.554 -0.346 2.415
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4795 1.5814 -2.20 0.028 *
## PreEntryBusinesAve 0.0338 0.3424 0.10 0.921
## PreEntryMgtAve 0.0146 0.2915 0.05 0.960
## PrePlanningModel 0.5136 0.2273 2.26 0.024 *
## StartupExperience 0.0131 0.1512 0.09 0.931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.229 on 89 degrees of freedom
## Residual deviance: 81.023 on 85 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 91.02
##
## Number of Fisher Scoring iterations: 5
# PlanningModel is significant
summary(glm(SFSaleOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SFSaleOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
## NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.917 -0.723 -0.552 -0.415 2.234
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.799 0.856 -3.27 0.0011 **
## NetworkInvestorIndirect1 -0.197 0.393 -0.50 0.6160
## NetworkInvestorIndirect2 0.235 0.389 0.60 0.5462
## NetworkInvestorIndirect3 0.353 0.337 1.05 0.2960
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.229 on 89 degrees of freedom
## Residual deviance: 82.788 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 90.79
##
## Number of Fisher Scoring iterations: 4
# None significant
summary(glm(SFSaleOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SFSaleOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
## NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.926 -0.701 -0.525 -0.430 2.231
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.520 0.742 -3.39 0.00069 ***
## NetworkInvestorDirect1 0.259 0.302 0.86 0.39051
## NetworkInvestorDirect2 -0.164 0.306 -0.54 0.59145
## NetworkInvestorDirect3 0.256 0.299 0.86 0.39235
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.847 on 88 degrees of freedom
## Residual deviance: 79.946 on 85 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 87.95
##
## Number of Fisher Scoring iterations: 4
# None significant
summary(glm(SFSaleOutcome ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SFSaleOutcome ~ NetInvInd + NetInvDir, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.825 -0.706 -0.554 -0.396 2.251
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.906 0.894 -3.25 0.0012 **
## NetInvInd 0.294 0.290 1.01 0.3119
## NetInvDir 0.107 0.276 0.39 0.6983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.847 on 88 degrees of freedom
## Residual deviance: 80.397 on 86 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 86.4
##
## Number of Fisher Scoring iterations: 4
# None averages significant
summary(glm(SFSaleOutcome ~ PrePlanningModel * NetInvInd, data = df.ioutcomes,
family = binomial))
##
## Call:
## glm(formula = SFSaleOutcome ~ PrePlanningModel * NetInvInd, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.002 -0.719 -0.518 -0.356 2.362
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9386 1.8205 -1.61 0.11
## PrePlanningModel 0.2120 0.5204 0.41 0.68
## NetInvInd -0.0726 0.5762 -0.13 0.90
## PrePlanningModel:NetInvInd 0.0726 0.1494 0.49 0.63
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 87.229 on 89 degrees of freedom
## Residual deviance: 80.479 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 88.48
##
## Number of Fisher Scoring iterations: 5
# None significant in SFSaleOutcome
# GrowthSales~ Investor Ties + PreEntryExperience
summary(glm(SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve + PrePlanningModel,
data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3191 -0.7585 -0.4316 0.0792 2.2848
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.647 1.845 -3.60 0.00032 ***
## PreEntryBusinesAve 0.880 0.383 2.30 0.02147 *
## PreEntryMgtAve -0.130 0.296 -0.44 0.66015
## PrePlanningModel 0.719 0.228 3.15 0.00163 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 103.470 on 91 degrees of freedom
## Residual deviance: 84.582 on 88 degrees of freedom
## AIC: 92.58
##
## Number of Fisher Scoring iterations: 5
# Business and PlanningModel are significant
summary(glm(SSalesChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SSalesChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
## NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.364 -0.850 -0.463 0.816 2.150
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.787 0.809 -3.44 0.00058 ***
## NetworkInvestorIndirect1 -0.305 0.364 -0.84 0.40240
## NetworkInvestorIndirect2 0.085 0.356 0.24 0.81155
## NetworkInvestorIndirect3 0.714 0.323 2.21 0.02699 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 102.304 on 89 degrees of freedom
## Residual deviance: 92.057 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 100.1
##
## Number of Fisher Scoring iterations: 4
# Indirect3 is significant
summary(glm(SSalesChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SSalesChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
## NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.225 -0.658 -0.502 -0.477 2.100
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.6732 0.7036 -3.80 0.00015 ***
## NetworkInvestorDirect1 0.0277 0.2855 0.10 0.92283
## NetworkInvestorDirect2 0.0420 0.2801 0.15 0.88084
## NetworkInvestorDirect3 0.4872 0.2816 1.73 0.08360 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.543 on 88 degrees of freedom
## Residual deviance: 89.683 on 85 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 97.68
##
## Number of Fisher Scoring iterations: 4
# Direct3 is significant
summary(glm(SSalesChange ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SSalesChange ~ NetInvInd + NetInvDir, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.073 -0.823 -0.521 -0.440 2.108
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.796 0.798 -3.50 0.00046 ***
## NetInvInd 0.151 0.270 0.56 0.57527
## NetInvDir 0.358 0.265 1.35 0.17609
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.543 on 88 degrees of freedom
## Residual deviance: 92.619 on 86 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 98.62
##
## Number of Fisher Scoring iterations: 4
# None averages significant but Dir is better
summary(glm(SSalesChange ~ PreEntryBusinesAve + PrePlanningModel * NetworkInvestorDirect3,
data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PrePlanningModel *
## NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6160 -0.7884 -0.3038 -0.0495 2.3608
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -11.011 3.177 -3.47
## PreEntryBusinesAve 0.830 0.384 2.16
## PrePlanningModel 1.510 0.626 2.41
## NetworkInvestorDirect3 1.761 0.833 2.11
## PrePlanningModel:NetworkInvestorDirect3 -0.350 0.190 -1.84
## Pr(>|z|)
## (Intercept) 0.00053 ***
## PreEntryBusinesAve 0.03055 *
## PrePlanningModel 0.01579 *
## NetworkInvestorDirect3 0.03453 *
## PrePlanningModel:NetworkInvestorDirect3 0.06516 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 99.543 on 88 degrees of freedom
## Residual deviance: 75.332 on 84 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 85.33
##
## Number of Fisher Scoring iterations: 6
# PreBus,PreModel,Direct, interaction are all significant
# Growth Employees~ Investor Ties + PreEntryExperience
summary(glm(SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve + PrePlanningModel +
StartupExperience, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve +
## PrePlanningModel + StartupExperience, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.967 -0.950 -0.411 1.018 2.032
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.589 1.429 -3.21 0.0013 **
## PreEntryBusinesAve 0.305 0.296 1.03 0.3033
## PreEntryMgtAve 0.626 0.252 2.48 0.0131 *
## PrePlanningModel 0.456 0.181 2.52 0.0118 *
## StartupExperience -0.055 0.132 -0.42 0.6780
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 123.65 on 89 degrees of freedom
## Residual deviance: 104.68 on 85 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 114.7
##
## Number of Fisher Scoring iterations: 4
# MGT and PlanningModel are significant
summary(glm(SStaffChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SStaffChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 +
## NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.60 -1.00 -0.65 1.02 1.95
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.960 0.630 -3.11 0.0019 **
## NetworkInvestorIndirect1 0.280 0.299 0.94 0.3483
## NetworkInvestorIndirect2 -0.302 0.317 -0.95 0.3406
## NetworkInvestorIndirect3 0.546 0.278 1.97 0.0493 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 124.05 on 89 degrees of freedom
## Residual deviance: 109.86 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 117.9
##
## Number of Fisher Scoring iterations: 4
# Indirect3 is significant
summary(glm(SStaffChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SStaffChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 +
## NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.545 -0.974 -0.735 1.068 1.698
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6715 0.5546 -3.01 0.0026 **
## NetworkInvestorDirect1 0.0680 0.2251 0.30 0.7625
## NetworkInvestorDirect2 0.3359 0.2228 1.51 0.1316
## NetworkInvestorDirect3 0.0967 0.2374 0.41 0.6838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 122.47 on 88 degrees of freedom
## Residual deviance: 112.82 on 85 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 120.8
##
## Number of Fisher Scoring iterations: 4
# None are significant
summary(glm(SStaffChange ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
##
## Call:
## glm(formula = SStaffChange ~ NetInvInd + NetInvDir, family = binomial,
## data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.52 -1.05 -0.66 1.09 1.81
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.962 0.630 -3.12 0.0018 **
## NetInvInd 0.282 0.219 1.28 0.1994
## NetInvDir 0.266 0.227 1.17 0.2412
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 122.47 on 88 degrees of freedom
## Residual deviance: 111.81 on 86 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 117.8
##
## Number of Fisher Scoring iterations: 4
# None averages significant but Dir is better
summary(glm(SStaffChange ~ PrePlanningModel * NetworkInvestorIndirect3, data = df.ioutcomes,
family = binomial))
##
## Call:
## glm(formula = SStaffChange ~ PrePlanningModel * NetworkInvestorIndirect3,
## family = binomial, data = df.ioutcomes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.456 -1.114 -0.427 0.995 2.210
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -3.848 1.400 -2.75
## PrePlanningModel 0.751 0.426 1.76
## NetworkInvestorIndirect3 0.899 0.437 2.06
## PrePlanningModel:NetworkInvestorIndirect3 -0.153 0.120 -1.28
## Pr(>|z|)
## (Intercept) 0.006 **
## PrePlanningModel 0.078 .
## NetworkInvestorIndirect3 0.040 *
## PrePlanningModel:NetworkInvestorIndirect3 0.201
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 124.05 on 89 degrees of freedom
## Residual deviance: 107.26 on 86 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 115.3
##
## Number of Fisher Scoring iterations: 4
# PreModel, InvesDirect are significant but not interaction term
Can we observe this data in a better way?
http://jason.bryer.org/timeline/ http://www.r-bloggers.com/visualising-twitter-user-timeline-activity-in-r/ http://blog.revolutionanalytics.com/2009/11/charting-time-series-as-calendar-heat-maps-in-r.html Google Search https://www.google.com/search?client=safari&rls=en&q=timeline+in+r&ie=UTF-8&oe=UTF-8