Incubator Project R Journal

In general this project aims to understand the relationships between entrepreneurs profile, their experiences in a startup incubator, and their incubator outcomes.

Exploring the Data

Load the data

Explore the data using the dPLYR and GGPlot2 packages

Relationship between Survival and PreEntry Experience (Work)

Graphs will have this format: Y-Axis= Variable of interest, X-Axis=Survival, Right-Axis=Angel Investment plot of chunk unnamed-chunk-3 plot of chunk unnamed-chunk-3 plot of chunk unnamed-chunk-3

More work experience -> More Survival -> More Angel Investment

Relationship between Survival and PreEntry Experience(Management)


gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), PreEntryManagement1Mean = mean(PreEntryManagement1), 
    PreEntryManagement2 = mean(PreEntryManagement2), PreEntryManagement3 = mean(PreEntryManagement3))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count PreEntryManagement1Mean
## 1            No              No    32                   2.594
## 2            No             Yes     4                   3.500
## 3           Yes              No    25                   2.840
## 4           Yes             Yes    31                   3.129
## Variables not shown: PreEntryManagement2 (dbl), PreEntryManagement3 (dbl)

qplot(data = smallData, factor(SurvivalFeb14), PreEntryManagement1, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-4

qplot(data = smallData, factor(SurvivalFeb14), PreEntryManagement2, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-4

qplot(data = smallData, factor(SurvivalFeb14), PreEntryManagement3, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-4

More management experience -> More survival -> More Angel Investment

Relationship between Survival and Pre-Planning

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), PrePlanningMarketMean = mean(PrePlanningMarket), 
    PrePlanningCompetitiveMean = mean(PrePlanningCompetitive, na.rm = TRUE), 
    PrePlanningBrandMean = mean(PrePlanningBrand), PrePlanningModelMean = mean(PrePlanningModel))
## Source: local data frame [4 x 7]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count PrePlanningMarketMean
## 1            No              No    32                 2.750
## 2            No             Yes     4                 2.750
## 3           Yes              No    25                 3.560
## 4           Yes             Yes    31                 3.516
## Variables not shown: PrePlanningCompetitiveMean (dbl),
##   PrePlanningBrandMean (dbl), PrePlanningModelMean (dbl)

qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningMarket, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-5

qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningCompetitive, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-5

qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningBrand, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-5

qplot(data = gSmallData, factor(SurvivalFeb14), PrePlanningModel, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-5

More pre-planning -> More Survival -> More Angel Investment

Relationship between Survival and Start-up Experience

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), StartupExperienceMean = mean(StartupExperience, 
    na.rm = TRUE), StartupExperienceMedian = median(StartupExperience, na.rm = TRUE), 
    StartupExperienceSD = sd(StartupExperience, na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count StartupExperienceMean
## 1            No              No    32                 1.806
## 2            No             Yes     4                 1.750
## 3           Yes              No    25                 1.500
## 4           Yes             Yes    31                 1.871
## Variables not shown: StartupExperienceMedian (int), StartupExperienceSD
##   (dbl)

qplot(data = gSmallData, factor(SurvivalFeb14), StartupExperience, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-6

More start-up experience ~-> More Survival and Angel Investment

Relationship between Survival and Mentorship

gSmallData$Survival <- ifelse(gSmallData$SurvivalFeb14 == 1, "No", "Yes")
gSmallData <- group_by(smallData, Survival, InvestmentAngel, IncubatorMentor)
## Error: index out of bounds
summarize(gSmallData, count = n())
## Source: local data frame [4 x 3]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count
## 1            No              No    32
## 2            No             Yes     4
## 3           Yes              No    25
## 4           Yes             Yes    31
qplot(data = gSmallData, Survival, geom = "histogram", fill = IncubatorMentor)

plot of chunk unnamed-chunk-7

More mentorship -> More survival and Angel Investment

Relationship between Survival/Angel and Networking (Strong Ties)

# needed for pairs.panel
smallData$SurvivalFeb14 <- as.integer(as.factor(smallData$SurvivalFeb14))
smallData$InvestmentAngel <- as.integer(as.factor(smallData$InvestmentAngel))

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), NSSpouse = mean(NetworkSupportStrongSpouse, 
    na.rm = TRUE), NSRelatives = mean(NetworkSupportStrongRelatives), NSFriends = mean(NetworkSupportStrongFriends))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count NSSpouse NSRelatives NSFriends
## 1             1               1    32    3.870       3.656     3.688
## 2             1               2     4    3.333       3.750     4.000
## 3             2               1    25    3.818       3.800     4.000
## 4             2               2    31    3.840       3.968     4.000

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportStrongSpouse, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 23 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-8

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportStrongRelatives, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-8

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportStrongFriends, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-8


pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkSupportStrongSpouse:NetworkSupportStrongFriends), 
    pch = ".", main = "Survival and NetworkSupport (Strong ties)")

plot of chunk unnamed-chunk-8

Small evidence that Network Support Affects survival or investment

Relationship between Survival/Angel and Networking (Weak Ties)

gSmallData <- group_by(smallData, SurvivalFeb14)
summarize(gSmallData, count = n(), NSBus = mean(NetworkSupportWeakBusiness, 
    na.rm = TRUE), NSAcq = mean(NetworkSupportWeakAcquaintances, na.rm = TRUE), 
    NSEmpl = mean(NetworkSupportWeakEmployers, na.rm = TRUE), NSCoW = mean(NetworkSupportWeakCoWorkers, 
        na.rm = TRUE))
## Source: local data frame [2 x 6]
## 
##   SurvivalFeb14 count NSBus NSAcq NSEmpl NSCoW
## 1             1    36 3.571 3.286  2.719 3.061
## 2             2    56 3.852 3.455  3.056 3.707

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakBusiness, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-9

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakAcquaintances, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-9

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakEmployers, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-9

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkSupportWeakCoWorkers, 
    geom = "boxplot") + facet_grid(InvestmentAngel ~ .)
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-9



pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers), 
    pch = ".", main = "Survival and NetworkSupport (Weak ties)")

plot of chunk unnamed-chunk-9

Relationshiop seems to positive between network support (weak ties) but not very strong.

Relationship between Survival/Angel and Investor Network (Indirect)

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), IInv1 = mean(NetworkInvestorIndirect1, na.rm = TRUE), 
    IInv2 = mean(NetworkInvestorIndirect2, na.rm = TRUE), IInv3 = mean(NetworkInvestorIndirect3, 
        na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count IInv1 IInv2 IInv3
## 1             1               1    32 2.750 2.438 2.656
## 2             1               2     4 3.500 2.750 2.500
## 3             2               1    25 3.708 3.500 3.250
## 4             2               2    31 3.800 3.933 3.900

gSmallData$Survival <- ifelse(gSmallData$SurvivalFeb14 == 1, "No", "Yes")

qplot(data = gSmallData, Survival, NetworkInvestorIndirect1, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-10

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorIndirect2, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-10

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorIndirect3, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-10



pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkInvestorIndirect1:NetworkInvestorIndirect3), 
    pch = ".", main = "Survival and NetworkSupport (Indirect Investor ties)")

plot of chunk unnamed-chunk-10

Strong effect Seems that indirect network of investor -> higher survival and angel investment

Relationship between Survival/Angel and Investor Network (Direct)

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
summarize(gSmallData, count = n(), DInv1 = mean(NetworkInvestorDirect1, na.rm = TRUE), 
    DInv2 = mean(NetworkInvestorDirect2, na.rm = TRUE), DInv3 = mean(NetworkInvestorDirect3, 
        na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count DInv1 DInv2 DInv3
## 1             1               1    32 2.219 2.531 1.844
## 2             1               2     4 3.000 2.750 3.000
## 3             2               1    25 2.708 2.875 2.333
## 4             2               2    31 3.933 3.655 3.207

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorDirect1, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-11

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorDirect2, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-11

qplot(data = gSmallData, factor(SurvivalFeb14), NetworkInvestorDirect3, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-11



pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, NetworkInvestorDirect1:NetworkInvestorDirect3), 
    pch = ".", main = "Survival and NetworkSupport (Direct Investor ties)")

plot of chunk unnamed-chunk-11

Strong effect Seems that direct network of investor -> higher survival and angel investment

Relationship between Survival/Angel and Satisfaction

gSmallData <- group_by(smallData, SurvivalFeb14)
summarize(gSmallData, count = n(), JobSatisfactionM = mean(SatisfactionJob, 
    na.rm = TRUE), IncubatorSatisfactionM = mean(SatisfactionIncubator, na.rm = TRUE))
## Source: local data frame [2 x 4]
## 
##   SurvivalFeb14 count JobSatisfactionM IncubatorSatisfactionM
## 1             1    36            3.639                  2.639
## 2             2    56            4.357                  3.643

qplot(data = gSmallData, factor(SurvivalFeb14), SatisfactionJob, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-12

qplot(data = gSmallData, factor(SurvivalFeb14), SatisfactionIncubator, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)

plot of chunk unnamed-chunk-12


pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, SatisfactionJob:SatisfactionIncubator), 
    pch = ".", main = "Survival and Satisfaction (Job and Incubator")

plot of chunk unnamed-chunk-12

More satisfaction -> More survival/angel investment

Relationship between Survival/Angel and Resource Allocation

smallData$ResourceInvestment <- as.integer(sub(smallData$ResourceInvestment, 
    pattern = "%", replacement = ""))
smallData$ResourceOperations <- as.integer(sub(smallData$ResourceOperations, 
    pattern = "%", replacement = ""))
smallData$ResourceHuman <- as.integer(sub(smallData$ResourceHuman, pattern = "%", 
    replacement = ""))

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)
dplyr::summarize(gSmallData, count = n(), MResInv = mean(ResourceInvestment, 
    na.rm = TRUE), MResOpe = mean(ResourceOperations, na.rm = TRUE), MResHum = mean(ResourceHuman, 
    na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count MResInv MResOpe MResHum
## 1             1               1    32   16.29   39.19   44.84
## 2             1               2     4   18.75   42.50   36.25
## 3             2               1    25   18.54   44.58   36.67
## 4             2               2    31   11.13   40.16   48.39

qplot(data = gSmallData, factor(SurvivalFeb14), ResourceInvestment, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-13

qplot(data = gSmallData, factor(SurvivalFeb14), ResourceOperations, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-13

qplot(data = gSmallData, factor(SurvivalFeb14), ResourceHuman, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-13


qplot(data = summarize(gSmallData, count = n(), MResInv = mean(ResourceInvestment, 
    na.rm = TRUE), MResOpe = mean(ResourceOperations, na.rm = TRUE), MResHum = mean(ResourceHuman, 
    na.rm = TRUE)), x = SurvivalFeb14, y = MResInv)

plot of chunk unnamed-chunk-13


df <- dplyr::summarize(gSmallData, count = n(), MResInv = mean(ResourceInvestment, 
    na.rm = TRUE), MResOpe = mean(ResourceOperations, na.rm = TRUE), MResHum = mean(ResourceHuman, 
    na.rm = TRUE))
df <- melt(data = df, id = c(df$SurvivalFeb14, df$InvestmentAngel))
## Error: could not find function "melt"

ggplot(data = df, aes(x = factor(SurvivalFeb14), y = value)) + geom_bar(aes(fill = factor(variable)), 
    position = "dodge", stat = "identity") + facet_grid(InvestmentAngel ~ .) + 
    ggtitle(label = "Investment by teams, RightFacet=InvestmentAngel (True=2)") + 
    xlab(label = "Survival (Yes=2)") + ylab(label = "Investment (Percent)")
## Error: object 'variable' not found

On investment, there seems to be a weak negative relationship with Survival (maybe). On operations, no effect. On human resources, it's a weak effect as well. No conclusions.

Upon closer inspection, it might be that higest percentage of investment on human resources at this stage might predict better survival and better investmnt. Maybe.

Relationship between Survival/AngelInvestment and basic Team Variables

Basic Team Variables: TeamNumberOwners, TeamNumberDecisions, TeamAge

# Fix Nathan and Ivette's ages. Wrong format
smallData$TeamAge[smallData$TeamAge == "15-34 years"] <- "25-34 years"
smallData$TeamAge[smallData$TeamAge == "24-34 years"] <- "25-34 years"
smallData$TeamAgeNum <- as.integer(factor(smallData$TeamAge))
gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel)

dplyr::summarize(gSmallData, count = n(), TeamOwnersM = mean(TeamNumberOwners, 
    na.rm = TRUE), TeamDecisionsM = mean(TeamNumberDecisions, na.rm = TRUE), 
    TeamAgeM = mean(TeamAgeNum, na.rm = TRUE))
## Source: local data frame [4 x 6]
## Groups: SurvivalFeb14
## 
##   SurvivalFeb14 InvestmentAngel count TeamOwnersM TeamDecisionsM TeamAgeM
## 1             1               1    32       2.062          2.033    2.156
## 2             1               2     4       2.000          2.250    2.000
## 3             2               1    25       2.208          2.250    2.120
## 4             2               2    31       2.645          2.400    2.032

qplot(data = gSmallData, factor(TeamAge), geom = "histogram", group = SurvivalFeb14, 
    fill = SurvivalFeb14, main = ("Histogram of Team Age groups and Survival"))

plot of chunk unnamed-chunk-14

qplot(data = gSmallData, factor(TeamNumberOwners), geom = "histogram", group = SurvivalFeb14, 
    fill = SurvivalFeb14, main = "Histogram of the Number of team owners and survival")

plot of chunk unnamed-chunk-14

qplot(data = gSmallData, factor(TeamNumberDecisions), geom = "histogram", group = SurvivalFeb14, 
    fill = SurvivalFeb14, main = "histogram of the number of team members who make decisions")

plot of chunk unnamed-chunk-14


qplot(data = gSmallData, factor(SurvivalFeb14), TeamNumberOwners, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-14

qplot(data = gSmallData, factor(SurvivalFeb14), TeamNumberDecisions, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-14

It's possible a higher number of team numbers (and members who make decisions) increases survival chances. It's not a strong relationship. More interesting to see this data in the stacked histograms. TeamAge seems to be fully concentrated in the middle layer.

Relationship between Survival/Investment and Team Heterogeneity

gSmallData <- group_by(smallData, SurvivalFeb14)
dplyr::summarize(gSmallData, count = n(), THeterM1 = mean(TeamHeterogeneity1, 
    na.rm = TRUE), THeterM2 = mean(TeamHeterogeneity2, na.rm = TRUE), THeterM3 = mean(TeamHeterogeneity3, 
    na.rm = TRUE))
## Source: local data frame [2 x 5]
## 
##   SurvivalFeb14 count THeterM1 THeterM2 THeterM3
## 1             1    36    3.343    3.400    3.429
## 2             2    56    3.696    3.821    3.964

qplot(data = gSmallData, factor(SurvivalFeb14), TeamHeterogeneity1, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-15

qplot(data = gSmallData, factor(SurvivalFeb14), TeamHeterogeneity2, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-15

qplot(data = gSmallData, factor(SurvivalFeb14), TeamHeterogeneity3, geom = "boxplot") + 
    facet_grid(InvestmentAngel ~ .)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-15

Small positive relationship team heterogenity and survival and angel investment

Relationship between Survival/Angel and team flexibility

gSmallData <- group_by(smallData, SurvivalFeb14)

dplyr::summarize(gSmallData, count = n(), TFlexM1 = mean(TeamFlexibility1, na.rm = TRUE), 
    TFlexM2 = mean(TeamFlexibility2, na.rm = TRUE), TFlexM3 = mean(TeamFlexibility3, 
        na.rm = TRUE))
## Source: local data frame [2 x 5]
## 
##   SurvivalFeb14 count TFlexM1 TFlexM2 TFlexM3
## 1             1    36   3.086   2.257   2.514
## 2             2    56   3.768   3.196   2.982

qplot(data = gSmallData, factor(SurvivalFeb14), TeamFlexibility1, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-16

qplot(data = gSmallData, factor(SurvivalFeb14), TeamFlexibility2, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-16

qplot(data = gSmallData, factor(SurvivalFeb14), TeamFlexibility3, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-16

Team flexibliity -> Higher survival

Relationship Survival/investment and Team Preference

gSmallData <- group_by(smallData, SurvivalFeb14)

dplyr::summarize(gSmallData, count = n(), TeamRelativeSizeReverseM = mean(TeamRelativeSizeInverseScored, 
    na.rm = TRUE))
## Source: local data frame [2 x 3]
## 
##   SurvivalFeb14 count TeamRelativeSizeReverseM
## 1             1    36                    3.533
## 2             2    56                    2.680
dplyr::summarize(gSmallData, count = n(), TPrefM1 = mean(TeamPreference1, na.rm = TRUE), 
    TPrefM2 = mean(TeamPreference2, na.rm = TRUE), TPrefM3 = mean(TeamPreference3, 
        na.rm = TRUE))
## Source: local data frame [2 x 5]
## 
##   SurvivalFeb14 count TPrefM1 TPrefM2 TPrefM3
## 1             1    36   3.971   3.971   4.029
## 2             2    56   4.304   4.107   4.143

qplot(data = gSmallData, factor(SurvivalFeb14), TeamRelativeSizeInverseScored, 
    geom = "boxplot", main = "Team relative size (Inverse Scored) higher means team too small")
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-17

qplot(data = gSmallData, factor(SurvivalFeb14), TeamPreference1, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-17

qplot(data = gSmallData, factor(SurvivalFeb14), TeamPreference2, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-17

qplot(data = gSmallData, factor(SurvivalFeb14), TeamPreference3, geom = "boxplot")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-17

No obvious effect. Only the inversely scored team size variable seems to be strong.

Relationshipo between Survival/Investment and Nationality

gSmallData <- group_by(smallData, SurvivalFeb14, InvestmentAngel, PersonalNationality)

dplyr::summarize(gSmallData, count = n())
## Source: local data frame [48 x 4]
## Groups: SurvivalFeb14, InvestmentAngel
## 
##    SurvivalFeb14 InvestmentAngel PersonalNationality count
## 1              1               1           Argentina     3
## 2              1               1              Brazil     2
## 3              1               1              Canada     1
## 4              1               1               Chile     7
## 5              1               1             England     1
## 6              1               1              Greece     1
## 7              1               1               India     2
## 8              1               1              Indian     1
## 9              1               1              Mexico     1
## 10             1               1            Pakistan     1
## 11             1               1                Peru     1
## 12             1               1           Sir Lanka     1
## 13             1               1               Spain     1
## 14             1               1                 USA     9
## 15             1               2             Ecuador     1
## 16             1               2              Mexico     1
## 17             1               2              Russia     1
## 18             1               2                 USA     1
## 19             2               1           Argentina     1
## 20             2               1             Austria     1
## 21             2               1               Chile     4
## 22             2               1               China     1
## 23             2               1             England     2
## 24             2               1              France     1
## 25             2               1               India     3
## 26             2               1           Indonesia     1
## 27             2               1              Israel     1
## 28             2               1               Italy     1
## 29             2               1               Kenya     1
## 30             2               1              Mexico     2
## 31             2               1                Peru     1
## 32             2               1                 USA     5
## 33             2               2           Argentina     6
## 34             2               2             Bolivia     1
## 35             2               2              Brazil     1
## 36             2               2               Chile     2
## 37             2               2             England     1
## 38             2               2              France     1
## 39             2               2               India     1
## 40             2               2               Italy     2
## 41             2               2         Netherlands     1
## 42             2               2         New Zealand     1
## 43             2               2                Peru     1
## 44             2               2               Spain     2
## 45             2               2                 USA     6
## 46             2               2             Ukraine     1
## 47             2               2             Uruguay     2
## 48             2               2           Venezuela     2

ggplot(data = dplyr::summarize(gSmallData, count = n()), aes(x = factor(PersonalNationality), 
    y = count)) + geom_bar(aes(fill = factor(SurvivalFeb14)), position = "dodge", 
    stat = "identity") + facet_grid(InvestmentAngel ~ .) + ggtitle(label = "Histogram of survival and AngelInv by country, RightFacet=InvestmentAngel (True=Yes)") + 
    xlab(label = "Country") + ylab(label = "Count")

plot of chunk unnamed-chunk-18



qplot(data = smallData, x = PersonalNationality, group = SurvivalFeb14, fill = factor(SurvivalFeb14), 
    geom = "histogram", main = "Histogram of countries by survival")

plot of chunk unnamed-chunk-18

qplot(data = smallData, x = PersonalNationality, group = InvestmentAngel, fill = factor(InvestmentAngel), 
    geom = "histogram", main = "Histogram of countries by InvestmentAngel")

plot of chunk unnamed-chunk-18

Explore with Psych Package

# explore pairs panel

pairs.panels(dplyr::select(smallData, SurvivalFeb14, PreEntryBusinessWork:PreEntryBusinessHobbies), 
    pch = ".", main = "Survival and PreEntry Experience")

plot of chunk unnamed-chunk-19

pairs.panels(dplyr::select(smallData, SurvivalFeb14, PreEntryManagement1:PrePlanningModel), 
    pch = ".", main = "Survival and PreEntry Management and PrePlanning")

plot of chunk unnamed-chunk-19


pairs.panels(dplyr::select(smallData, SurvivalFeb14, InvestmentAngel, PreEntryManagement1:PrePlanningModel), 
    pch = ".", main = "Survival/AngelInvestment and PreEntry Management and PrePlanning")

plot of chunk unnamed-chunk-19


# explore error bars
error.bars.by(dplyr::select(smallData, PreEntryManagement1:PrePlanningModel), 
    group = smallData$SurveySource)

plot of chunk unnamed-chunk-19

# Observe the effect of the survey source = Online, Online2, and Paper

# explore correlation tests
corr.test(dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## Error: arguments imply differing number of rows: 0, 3

# significance
r.test(92, 0.44)
## Correlation tests 
## Call:r.test(n = 92, r12 = 0.44)
## Test of significance of a  correlation 
##  t value 4.65    with probability < 1.1e-05
##  and confidence interval  0.26 0.59

cortest(dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## R1 was not square, finding R from data
## Tests of correlation matrices 
## Call:cortest(R1 = dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
##  Chi Square value 46.83  with df =  3   with probability < 3.8e-10

Factor Analysis and Reliability of each latent variable:

# Exploratory Factor Analysis


# Pre-entry
fa.preentry <- fa(dplyr::select(smallData, PreEntryBusinessWork:StartupExperience), 
    4)
## Loading required package: GPArotation
## Warning: convergence not obtained in GPFoblq. 1000 iterations used.
plot(fa.preentry)

plot of chunk unnamed-chunk-20

fa.diagram(fa.preentry)

plot of chunk unnamed-chunk-20

fa.preentry
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, PreEntryBusinessWork:StartupExperience), 
##     nfactors = 4)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                             MR1   MR2   MR3   MR4   h2   u2 com
## PreEntryBusinessWork      -0.05  0.04  0.55  0.15 0.32 0.68 1.2
## PreEntryBusinessEducation  0.17  0.03  0.68 -0.11 0.54 0.46 1.2
## PreEntryBusinessHobbies   -0.11 -0.01  0.65  0.05 0.41 0.59 1.1
## PreEntryManagement1        0.08  0.75 -0.11 -0.05 0.56 0.44 1.1
## PreEntryManagement2       -0.04  0.87  0.04  0.07 0.79 0.21 1.0
## PreEntryManagement3       -0.03  0.74  0.08 -0.11 0.55 0.45 1.1
## PrePlanningMarket          0.62  0.07 -0.03  0.41 0.73 0.27 1.8
## PrePlanningCompetitive     0.74  0.09 -0.01  0.29 0.81 0.19 1.3
## PrePlanningBrand           0.81  0.04  0.05 -0.02 0.69 0.31 1.0
## PrePlanningModel           0.98 -0.04  0.01 -0.15 0.89 0.11 1.1
## StartupExperience          0.01 -0.08  0.17  0.29 0.10 0.90 1.8
## 
##                        MR1  MR2  MR3  MR4
## SS loadings           2.72 1.94 1.25 0.49
## Proportion Var        0.25 0.18 0.11 0.04
## Cumulative Var        0.25 0.42 0.54 0.58
## Proportion Explained  0.43 0.30 0.20 0.08
## Cumulative Proportion 0.43 0.73 0.92 1.00
## 
##  With factor correlations of 
##      MR1  MR2   MR3   MR4
## MR1 1.00 0.32  0.11  0.28
## MR2 0.32 1.00  0.25  0.21
## MR3 0.11 0.25  1.00 -0.02
## MR4 0.28 0.21 -0.02  1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  55  and the objective function was  5.02 with Chi Square of  434.2
## The degrees of freedom for the model are 17  and the objective function was  0.36 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  91 with the empirical chi square  15.31  with prob <  0.57 
## The total number of observations was  92  with MLE Chi Square =  30.55  with prob <  0.023 
## 
## Tucker Lewis Index of factoring reliability =  0.88
## RMSEA index =  0.102  and the 90 % confidence intervals are  0.035 0.145
## BIC =  -46.32
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 MR1  MR2  MR3  MR4
## Correlation of scores with factors             0.97 0.93 0.83 0.79
## Multiple R square of scores with factors       0.94 0.87 0.70 0.62
## Minimum correlation of possible factor scores  0.88 0.73 0.39 0.23

# Pre-entry business
fa.preentrybusiness <- fa(dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies), 
    1)
fa.diagram(fa.preentrybusiness)

plot of chunk unnamed-chunk-20

fa.preentrybusiness
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                            MR1   h2   u2 com
## PreEntryBusinessWork      0.53 0.28 0.72   1
## PreEntryBusinessEducation 0.82 0.67 0.33   1
## PreEntryBusinessHobbies   0.54 0.29 0.71   1
## 
##                 MR1
## SS loadings    1.24
## Proportion Var 0.41
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  0.44 with Chi Square of  39.08
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  92 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.86
## Multiple R square of scores with factors       0.74
## Minimum correlation of possible factor scores  0.48
alpha(select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd
##       0.65      0.65    0.57      0.39 1.9 0.11  3.6 0.88
## 
##  lower alpha upper     95% confidence boundaries
## 0.43 0.65 0.88 
## 
##  Reliability if an item is dropped:
##                           raw_alpha std.alpha G6(smc) average_r S/N
## PreEntryBusinessWork           0.61      0.61    0.44      0.44 1.6
## PreEntryBusinessEducation      0.44      0.44    0.29      0.29 0.8
## PreEntryBusinessHobbies        0.61      0.61    0.44      0.44 1.5
##                           alpha se
## PreEntryBusinessWork          0.17
## PreEntryBusinessEducation     0.19
## PreEntryBusinessHobbies       0.17
## 
##  Item statistics 
##                            n    r r.cor r.drop mean  sd
## PreEntryBusinessWork      92 0.75  0.53   0.42  3.7 1.2
## PreEntryBusinessEducation 92 0.81  0.67   0.55  3.7 1.1
## PreEntryBusinessHobbies   92 0.75  0.53   0.43  3.6 1.2
## 
## Non missing response frequency for each item
##                              1    2    3    4    5 miss
## PreEntryBusinessWork      0.04 0.13 0.22 0.32 0.29    0
## PreEntryBusinessEducation 0.01 0.17 0.23 0.30 0.28    0
## PreEntryBusinessHobbies   0.08 0.13 0.18 0.38 0.23    0

# Pre-entry Management
fa.preentrymgt <- fa(dplyr::select(smallData, PreEntryManagement1:PreEntryManagement3), 
    1)
fa.diagram(fa.preentrymgt)

plot of chunk unnamed-chunk-20

fa.preentrymgt
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, PreEntryManagement1:PreEntryManagement3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                      MR1   h2   u2 com
## PreEntryManagement1 0.73 0.54 0.46   1
## PreEntryManagement2 0.87 0.76 0.24   1
## PreEntryManagement3 0.73 0.53 0.47   1
## 
##                 MR1
## SS loadings    1.83
## Proportion Var 0.61
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  1.1 with Chi Square of  97.84
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  92 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.92
## Multiple R square of scores with factors       0.85
## Minimum correlation of possible factor scores  0.69
alpha(select(smallData, PreEntryManagement1:PreEntryManagement3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, PreEntryManagement1:PreEntryManagement3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean sd
##       0.82      0.82    0.76       0.6 4.6 0.091  2.8  1
## 
##  lower alpha upper     95% confidence boundaries
## 0.64 0.82 0.99 
## 
##  Reliability if an item is dropped:
##                     raw_alpha std.alpha G6(smc) average_r S/N alpha se
## PreEntryManagement1      0.77      0.78    0.64      0.64 3.5     0.14
## PreEntryManagement2      0.69      0.70    0.54      0.54 2.3     0.16
## PreEntryManagement3      0.78      0.78    0.64      0.64 3.6     0.14
## 
##  Item statistics 
##                      n    r r.cor r.drop mean  sd
## PreEntryManagement1 92 0.85  0.72   0.65  2.9 1.1
## PreEntryManagement2 92 0.88  0.81   0.73  2.9 1.1
## PreEntryManagement3 92 0.84  0.72   0.65  2.7 1.3
## 
## Non missing response frequency for each item
##                        1    2    3    4    5 miss
## PreEntryManagement1 0.13 0.25 0.29 0.26 0.07    0
## PreEntryManagement2 0.18 0.11 0.38 0.28 0.04    0
## PreEntryManagement3 0.25 0.22 0.24 0.18 0.11    0

# Pre-entry Bus Planning
fa.preentrypln <- fa(dplyr::select(smallData, PrePlanningMarket:PrePlanningModel), 
    1)
fa.diagram(fa.preentrypln)

plot of chunk unnamed-chunk-20

fa.preentrypln
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, PrePlanningMarket:PrePlanningModel), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         MR1   h2   u2 com
## PrePlanningMarket      0.79 0.63 0.37   1
## PrePlanningCompetitive 0.88 0.77 0.23   1
## PrePlanningBrand       0.83 0.68 0.32   1
## PrePlanningModel       0.86 0.74 0.26   1
## 
##                 MR1
## SS loadings    2.83
## Proportion Var 0.71
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  2.71 with Chi Square of  240.8
## The degrees of freedom for the model are 2  and the objective function was  0.15 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic number of observations is  92 with the empirical chi square  2.57  with prob <  0.28 
## The total number of observations was  92  with MLE Chi Square =  13.16  with prob <  0.0014 
## 
## Tucker Lewis Index of factoring reliability =  0.856
## RMSEA index =  0.252  and the 90 % confidence intervals are  0.132 0.38
## BIC =  4.12
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.95
## Multiple R square of scores with factors       0.91
## Minimum correlation of possible factor scores  0.82
alpha(select(smallData, PrePlanningMarket:PrePlanningModel))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, PrePlanningMarket:PrePlanningModel))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##        0.9       0.9    0.89       0.7 9.5 0.058  3.2 1.2
## 
##  lower alpha upper     95% confidence boundaries
## 0.79 0.9 1.02 
## 
##  Reliability if an item is dropped:
##                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
## PrePlanningMarket           0.89      0.89    0.85      0.74 8.4    0.079
## PrePlanningCompetitive      0.86      0.86    0.82      0.68 6.3    0.083
## PrePlanningBrand            0.88      0.88    0.84      0.71 7.4    0.081
## PrePlanningModel            0.87      0.87    0.82      0.69 6.6    0.083
## 
##  Item statistics 
##                         n    r r.cor r.drop mean  sd
## PrePlanningMarket      92 0.85  0.78   0.73  3.2 1.2
## PrePlanningCompetitive 91 0.90  0.87   0.82  3.2 1.3
## PrePlanningBrand       92 0.87  0.82   0.78  3.1 1.4
## PrePlanningModel       92 0.89  0.85   0.81  3.3 1.4
## 
## Non missing response frequency for each item
##                           1    2    3    4    5 miss
## PrePlanningMarket      0.09 0.22 0.21 0.36 0.13 0.00
## PrePlanningCompetitive 0.12 0.19 0.24 0.29 0.16 0.01
## PrePlanningBrand       0.16 0.18 0.22 0.23 0.21 0.00
## PrePlanningModel       0.15 0.16 0.22 0.20 0.27 0.00

# Pre-entry Org Capital Only one measurement don't work!
fa.preentryorgcap <- fa(dplyr::select(smallData, StartupExperience), 1)
## In fa, too many factors requested for this number of variables to use SMC for communality estimates, 1s are used instead
fa.diagram(fa.preentryorgcap)

plot of chunk unnamed-chunk-20

fa.preentryorgcap
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, StartupExperience), nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   MR1    h2 u2 com
## StartupExperience   0 1e-30  1   1
## 
##                MR1
## SS loadings      0
## Proportion Var   0
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  0  and the objective function was  0 with Chi Square of  0
## The degrees of freedom for the model are -1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  NaN 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  90 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  NaN
## Fit based upon off diagonal values = NaN
## Measures of factor score adequacy             
##                                                MR1
## Correlation of scores with factors               0
## Multiple R square of scores with factors         0
## Minimum correlation of possible factor scores   -1
alpha(data.frame(1, 1, 1))
## Warning: Item = X1 had no variance and was deleted
## Warning: Item = X1.1 had no variance and was deleted
## Warning: Item = X1.2 had no variance and was deleted
## Error: only defined on a data frame with all numeric variables
df <- select(smallData, StartupExperience)

# Mentorship: Together
fa.mentorship <- fa(dplyr::select(smallData, MFQCareerSupport1:MFQRoleModeling3), 
    3)
fa.diagram(fa.mentorship)

plot of chunk unnamed-chunk-20

fa.mentorship
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, MFQCareerSupport1:MFQRoleModeling3), 
##     nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                     MR1   MR3   MR2   h2    u2 com
## MFQCareerSupport1  0.06  0.82 -0.09 0.68 0.317 1.0
## MFQCareerSupport2  0.02  0.90  0.01 0.83 0.174 1.0
## MFQCareerSupport3 -0.05  0.93  0.06 0.86 0.139 1.0
## MFQPsySupport1     0.97 -0.01 -0.04 0.92 0.075 1.0
## MFQPsySupport2     0.95  0.05 -0.01 0.93 0.072 1.0
## MFQPsySupport3     0.83 -0.04  0.09 0.73 0.272 1.0
## MFQRoleModeling1   0.15  0.06  0.63 0.48 0.516 1.1
## MFQRoleModeling2   0.05 -0.06  0.94 0.90 0.098 1.0
## MFQRoleModeling3  -0.08  0.05  0.93 0.83 0.166 1.0
## 
##                        MR1  MR3  MR2
## SS loadings           2.62 2.37 2.18
## Proportion Var        0.29 0.26 0.24
## Cumulative Var        0.29 0.55 0.80
## Proportion Explained  0.37 0.33 0.30
## Cumulative Proportion 0.37 0.70 1.00
## 
##  With factor correlations of 
##      MR1  MR3  MR2
## MR1 1.00 0.23 0.27
## MR3 0.23 1.00 0.13
## MR2 0.27 0.13 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  7.93 with Chi Square of  691.4
## The degrees of freedom for the model are 12  and the objective function was  0.42 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  46 with the empirical chi square  1.91  with prob <  1 
## The total number of observations was  92  with MLE Chi Square =  35.77  with prob <  0.00035 
## 
## Tucker Lewis Index of factoring reliability =  0.889
## RMSEA index =  0.155  and the 90 % confidence intervals are  0.093 0.203
## BIC =  -18.49
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1  MR3  MR2
## Correlation of scores with factors             0.98 0.96 0.97
## Multiple R square of scores with factors       0.97 0.93 0.94
## Minimum correlation of possible factor scores  0.93 0.86 0.88

# Mentorship: Career Support
fa.mencareer <- fa(dplyr::select(smallData, MFQCareerSupport1:MFQCareerSupport3), 
    1)
fa.diagram(fa.mencareer)

plot of chunk unnamed-chunk-20

fa.mencareer
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, MFQCareerSupport1:MFQCareerSupport3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                    MR1   h2   u2 com
## MFQCareerSupport1 0.82 0.67 0.33   1
## MFQCareerSupport2 0.91 0.84 0.16   1
## MFQCareerSupport3 0.92 0.84 0.16   1
## 
##                 MR1
## SS loadings    2.35
## Proportion Var 0.78
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  2.17 with Chi Square of  193.9
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  46 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.96
## Multiple R square of scores with factors       0.93
## Minimum correlation of possible factor scores  0.85
alpha(select(smallData, MFQCareerSupport1:MFQCareerSupport3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, MFQCareerSupport1:MFQCareerSupport3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.91      0.91    0.88      0.78  11 0.075  3.8 1.2
## 
##  lower alpha upper     95% confidence boundaries
## 0.77 0.91 1.06 
## 
##  Reliability if an item is dropped:
##                   raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## MFQCareerSupport1      0.91      0.91    0.84      0.84 10.5     0.12
## MFQCareerSupport2      0.85      0.86    0.75      0.75  6.1     0.13
## MFQCareerSupport3      0.85      0.86    0.75      0.75  6.0     0.13
## 
##  Item statistics 
##                    n    r r.cor r.drop mean  sd
## MFQCareerSupport1 46 0.90  0.81   0.78  3.8 1.2
## MFQCareerSupport2 46 0.93  0.89   0.85  3.7 1.4
## MFQCareerSupport3 46 0.94  0.90   0.85  3.8 1.4
## 
## Non missing response frequency for each item
##                      1    2    3    4    5 miss
## MFQCareerSupport1 0.02 0.20 0.11 0.26 0.41  0.5
## MFQCareerSupport2 0.11 0.11 0.15 0.26 0.37  0.5
## MFQCareerSupport3 0.13 0.09 0.07 0.33 0.39  0.5

# Mentorship: psychosocial support
fa.menpsy <- fa(dplyr::select(smallData, MFQPsySupport1:MFQPsySupport3), 1)
fa.diagram(fa.menpsy)

plot of chunk unnamed-chunk-20

fa.menpsy
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, MFQPsySupport1:MFQPsySupport3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2    u2 com
## MFQPsySupport1 0.97 0.94 0.065   1
## MFQPsySupport2 0.96 0.91 0.086   1
## MFQPsySupport3 0.84 0.71 0.287   1
## 
##                 MR1
## SS loadings    2.56
## Proportion Var 0.85
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  3.09 with Chi Square of  275.4
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  46 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.98
## Multiple R square of scores with factors       0.97
## Minimum correlation of possible factor scores  0.93
alpha(select(smallData, MFQPsySupport1:MFQPsySupport3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, MFQPsySupport1:MFQPsySupport3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd
##       0.94      0.94    0.93      0.85  17 0.07    3 1.3
## 
##  lower alpha upper     95% confidence boundaries
## 0.81 0.94 1.08 
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## MFQPsySupport1      0.89      0.89    0.81      0.81  8.4     0.13
## MFQPsySupport2      0.90      0.90    0.82      0.82  8.9     0.12
## MFQPsySupport3      0.96      0.96    0.92      0.92 24.5     0.11
## 
##  Item statistics 
##                 n    r r.cor r.drop mean  sd
## MFQPsySupport1 46 0.96  0.95   0.91    3 1.4
## MFQPsySupport2 46 0.96  0.95   0.91    3 1.3
## MFQPsySupport3 46 0.92  0.84   0.83    3 1.5
## 
## Non missing response frequency for each item
##                   1    2    3    4    5 miss
## MFQPsySupport1 0.17 0.26 0.15 0.24 0.17  0.5
## MFQPsySupport2 0.15 0.24 0.20 0.24 0.17  0.5
## MFQPsySupport3 0.22 0.17 0.20 0.20 0.22  0.5

# Mentorship: RoleModeling
fa.menrole <- fa(dplyr::select(smallData, MFQRoleModeling1:MFQRoleModeling3), 
    1)
fa.diagram(fa.menrole)

plot of chunk unnamed-chunk-20

fa.menrole
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, MFQRoleModeling1:MFQRoleModeling3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   MR1   h2    u2 com
## MFQRoleModeling1 0.68 0.46 0.542   1
## MFQRoleModeling2 0.96 0.92 0.084   1
## MFQRoleModeling3 0.89 0.80 0.202   1
## 
##                 MR1
## SS loadings    2.17
## Proportion Var 0.72
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  1.87 with Chi Square of  167.1
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  46 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.97
## Multiple R square of scores with factors       0.94
## Minimum correlation of possible factor scores  0.88
alpha(select(smallData, MFQRoleModeling1:MFQRoleModeling3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, MFQRoleModeling1:MFQRoleModeling3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.87      0.88    0.85       0.7 7.1 0.081  3.6 1.1
## 
##  lower alpha upper     95% confidence boundaries
## 0.72 0.87 1.03 
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## MFQRoleModeling1      0.92      0.92    0.86      0.86 11.8     0.12
## MFQRoleModeling2      0.75      0.75    0.60      0.60  3.1     0.15
## MFQRoleModeling3      0.79      0.79    0.65      0.65  3.7     0.14
## 
##  Item statistics 
##                   n    r r.cor r.drop mean  sd
## MFQRoleModeling1 46 0.84  0.68   0.65  3.4 1.2
## MFQRoleModeling2 46 0.93  0.91   0.85  3.7 1.2
## MFQRoleModeling3 46 0.92  0.89   0.80  3.7 1.4
## 
## Non missing response frequency for each item
##                     1    2    3    4    5 miss
## MFQRoleModeling1 0.07 0.20 0.20 0.35 0.20  0.5
## MFQRoleModeling2 0.04 0.15 0.17 0.35 0.28  0.5
## MFQRoleModeling3 0.13 0.07 0.11 0.33 0.37  0.5

# Networking: Together
fa.net <- fa(dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3), 
    4)
fa.diagram(fa.net)

plot of chunk unnamed-chunk-20

fa.net
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3), 
##     nfactors = 4)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                   MR1   MR3   MR4   MR2   h2    u2 com
## NetworkSupportStrongSpouse      -0.07  0.02 -0.06  0.53 0.27 0.730 1.1
## NetworkSupportStrongRelatives   -0.09  0.02  0.04  0.97 0.94 0.063 1.0
## NetworkSupportStrongFriends      0.27 -0.05 -0.04  0.66 0.53 0.465 1.3
## NetworkSupportWeakBusiness       0.30  0.49 -0.22  0.10 0.39 0.613 2.2
## NetworkSupportWeakAcquaintances  0.28  0.55  0.02 -0.13 0.45 0.546 1.6
## NetworkSupportWeakEmployers     -0.17  0.97  0.11 -0.02 0.89 0.108 1.1
## NetworkSupportWeakCoWorkers      0.23  0.76 -0.13  0.11 0.73 0.272 1.3
## NetworkInvestorIndirect1         0.95  0.00  0.05 -0.05 0.95 0.046 1.0
## NetworkInvestorIndirect2         0.76  0.02  0.18  0.03 0.79 0.214 1.1
## NetworkInvestorIndirect3         0.69  0.05  0.20  0.12 0.76 0.243 1.3
## NetworkInvestorDirect1           0.26  0.02  0.66  0.08 0.73 0.267 1.3
## NetworkInvestorDirect2           0.29  0.06  0.59 -0.04 0.64 0.357 1.5
## NetworkInvestorDirect3           0.00  0.01  0.90  0.02 0.81 0.193 1.0
## 
##                        MR1  MR3  MR4  MR2
## SS loadings           2.89 2.18 2.06 1.75
## Proportion Var        0.22 0.17 0.16 0.13
## Cumulative Var        0.22 0.39 0.55 0.68
## Proportion Explained  0.33 0.25 0.23 0.20
## Cumulative Proportion 0.33 0.57 0.80 1.00
## 
##  With factor correlations of 
##      MR1  MR3  MR4  MR2
## MR1 1.00 0.27 0.58 0.18
## MR3 0.27 1.00 0.12 0.15
## MR4 0.58 0.12 1.00 0.14
## MR2 0.18 0.15 0.14 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  78  and the objective function was  9 with Chi Square of  772.2
## The degrees of freedom for the model are 32  and the objective function was  0.85 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  78 with the empirical chi square  21.4  with prob <  0.92 
## The total number of observations was  92  with MLE Chi Square =  70.73  with prob <  9.6e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.859
## RMSEA index =  0.125  and the 90 % confidence intervals are  0.079 0.151
## BIC =  -73.97
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 MR1  MR3  MR4  MR2
## Correlation of scores with factors             0.98 0.96 0.94 0.97
## Multiple R square of scores with factors       0.96 0.92 0.89 0.94
## Minimum correlation of possible factor scores  0.93 0.84 0.77 0.88
alpha(select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, NetworkSupportStrongSpouse:NetworkInvestorDirect3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.86      0.85    0.93      0.31 5.8 0.032  3.3 0.86
## 
##  lower alpha upper     95% confidence boundaries
## 0.8 0.86 0.92 
## 
##  Reliability if an item is dropped:
##                                 raw_alpha std.alpha G6(smc) average_r S/N
## NetworkSupportStrongSpouse           0.87      0.87    0.93      0.35 6.5
## NetworkSupportStrongRelatives        0.86      0.85    0.92      0.33 5.8
## NetworkSupportStrongFriends          0.86      0.85    0.92      0.32 5.6
## NetworkSupportWeakBusiness           0.86      0.85    0.92      0.32 5.7
## NetworkSupportWeakAcquaintances      0.85      0.84    0.92      0.31 5.4
## NetworkSupportWeakEmployers          0.86      0.85    0.92      0.32 5.7
## NetworkSupportWeakCoWorkers          0.85      0.84    0.91      0.31 5.3
## NetworkInvestorIndirect1             0.84      0.83    0.91      0.29 4.8
## NetworkInvestorIndirect2             0.84      0.83    0.91      0.29 4.9
## NetworkInvestorIndirect3             0.83      0.83    0.91      0.28 4.8
## NetworkInvestorDirect1               0.84      0.83    0.92      0.29 5.0
## NetworkInvestorDirect2               0.84      0.84    0.92      0.30 5.1
## NetworkInvestorDirect3               0.85      0.84    0.92      0.31 5.3
##                                 alpha se
## NetworkSupportStrongSpouse         0.032
## NetworkSupportStrongRelatives      0.033
## NetworkSupportStrongFriends        0.034
## NetworkSupportWeakBusiness         0.034
## NetworkSupportWeakAcquaintances    0.035
## NetworkSupportWeakEmployers        0.034
## NetworkSupportWeakCoWorkers        0.035
## NetworkInvestorIndirect1           0.037
## NetworkInvestorIndirect2           0.037
## NetworkInvestorIndirect3           0.037
## NetworkInvestorDirect1             0.036
## NetworkInvestorDirect2             0.036
## NetworkInvestorDirect3             0.035
## 
##  Item statistics 
##                                  n    r r.cor r.drop mean  sd
## NetworkSupportStrongSpouse      62 0.25  0.17   0.11  3.8 1.3
## NetworkSupportStrongRelatives   92 0.45  0.42   0.31  3.8 1.2
## NetworkSupportStrongFriends     92 0.53  0.50   0.42  3.9 1.1
## NetworkSupportWeakBusiness      89 0.50  0.46   0.39  3.7 1.2
## NetworkSupportWeakAcquaintances 90 0.59  0.55   0.49  3.4 1.2
## NetworkSupportWeakEmployers     68 0.50  0.48   0.38  2.9 1.4
## NetworkSupportWeakCoWorkers     74 0.61  0.59   0.50  3.4 1.4
## NetworkInvestorIndirect1        90 0.78  0.79   0.75  3.4 1.6
## NetworkInvestorIndirect2        90 0.78  0.78   0.74  3.2 1.5
## NetworkInvestorIndirect3        90 0.81  0.81   0.78  3.2 1.6
## NetworkInvestorDirect1          90 0.72  0.71   0.68  3.0 1.6
## NetworkInvestorDirect2          89 0.69  0.67   0.64  3.0 1.5
## NetworkInvestorDirect3          89 0.62  0.60   0.56  2.5 1.5
## 
## Non missing response frequency for each item
##                                    1    2    3    4    5 miss
## NetworkSupportStrongSpouse      0.08 0.06 0.23 0.21 0.42 0.33
## NetworkSupportStrongRelatives   0.04 0.13 0.15 0.33 0.35 0.00
## NetworkSupportStrongFriends     0.02 0.13 0.17 0.28 0.39 0.00
## NetworkSupportWeakBusiness      0.04 0.15 0.17 0.30 0.34 0.03
## NetworkSupportWeakAcquaintances 0.11 0.12 0.20 0.40 0.17 0.02
## NetworkSupportWeakEmployers     0.26 0.09 0.28 0.22 0.15 0.26
## NetworkSupportWeakCoWorkers     0.16 0.07 0.23 0.27 0.27 0.20
## NetworkInvestorIndirect1        0.23 0.08 0.09 0.27 0.33 0.02
## NetworkInvestorIndirect2        0.22 0.11 0.17 0.21 0.29 0.02
## NetworkInvestorIndirect3        0.26 0.07 0.17 0.22 0.29 0.02
## NetworkInvestorDirect1          0.29 0.14 0.12 0.21 0.23 0.02
## NetworkInvestorDirect2          0.24 0.18 0.17 0.18 0.24 0.03
## NetworkInvestorDirect3          0.38 0.19 0.12 0.18 0.12 0.03

# Networking: Strong Ties
fa.netstrong <- fa(dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends), 
    1)
fa.diagram(fa.netstrong)

plot of chunk unnamed-chunk-20

fa.netstrong
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                MR1   h2   u2 com
## NetworkSupportStrongSpouse    0.51 0.26 0.74   1
## NetworkSupportStrongRelatives 0.96 0.92 0.08   1
## NetworkSupportStrongFriends   0.69 0.47 0.53   1
## 
##                 MR1
## SS loadings    1.65
## Proportion Var 0.55
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  0.85 with Chi Square of  75.39
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  73 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  NaN
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.96
## Multiple R square of scores with factors       0.93
## Minimum correlation of possible factor scores  0.85
alpha(select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, NetworkSupportStrongSpouse:NetworkSupportStrongFriends))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N ase mean   sd
##       0.75      0.75     0.7       0.5 3.1 0.1  3.8 0.99
## 
##  lower alpha upper     95% confidence boundaries
## 0.55 0.75 0.95 
## 
##  Reliability if an item is dropped:
##                               raw_alpha std.alpha G6(smc) average_r S/N
## NetworkSupportStrongSpouse         0.79      0.79    0.66      0.66 3.9
## NetworkSupportStrongRelatives      0.54      0.55    0.38      0.38 1.2
## NetworkSupportStrongFriends        0.65      0.65    0.48      0.48 1.8
##                               alpha se
## NetworkSupportStrongSpouse        0.14
## NetworkSupportStrongRelatives     0.18
## NetworkSupportStrongFriends       0.16
## 
##  Item statistics 
##                                n    r r.cor r.drop mean  sd
## NetworkSupportStrongSpouse    62 0.76  0.53   0.47  3.8 1.3
## NetworkSupportStrongRelatives 92 0.87  0.80   0.68  3.8 1.2
## NetworkSupportStrongFriends   92 0.83  0.72   0.60  3.9 1.1
## 
## Non missing response frequency for each item
##                                  1    2    3    4    5 miss
## NetworkSupportStrongSpouse    0.08 0.06 0.23 0.21 0.42 0.33
## NetworkSupportStrongRelatives 0.04 0.13 0.15 0.33 0.35 0.00
## NetworkSupportStrongFriends   0.02 0.13 0.17 0.28 0.39 0.00

# Networking: Weak Ties
fa.netweak <- fa(dplyr::select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers), 
    1)
fa.diagram(fa.netweak)

plot of chunk unnamed-chunk-20

fa.netweak
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                  MR1   h2   u2 com
## NetworkSupportWeakBusiness      0.63 0.40 0.60   1
## NetworkSupportWeakAcquaintances 0.63 0.40 0.60   1
## NetworkSupportWeakEmployers     0.84 0.71 0.29   1
## NetworkSupportWeakCoWorkers     0.83 0.70 0.30   1
## 
##                 MR1
## SS loadings    2.20
## Proportion Var 0.55
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  1.72 with Chi Square of  153
## The degrees of freedom for the model are 2  and the objective function was  0.18 
## 
## The root mean square of the residuals (RMSR) is  0.08 
## The df corrected root mean square of the residuals is  0.14 
## 
## The harmonic number of observations is  73 with the empirical chi square  5.85  with prob <  0.054 
## The total number of observations was  92  with MLE Chi Square =  16.1  with prob <  0.00032 
## 
## Tucker Lewis Index of factoring reliability =  0.71
## RMSEA index =  0.283  and the 90 % confidence intervals are  0.162 0.409
## BIC =  7.06
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.92
## Multiple R square of scores with factors       0.85
## Minimum correlation of possible factor scores  0.70
alpha(select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, NetworkSupportWeakBusiness:NetworkSupportWeakCoWorkers))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd
##       0.83      0.83    0.81      0.55 4.8 0.07  3.4 1.1
## 
##  lower alpha upper     95% confidence boundaries
## 0.69 0.83 0.97 
## 
##  Reliability if an item is dropped:
##                                 raw_alpha std.alpha G6(smc) average_r S/N
## NetworkSupportWeakBusiness           0.81      0.81    0.77      0.58 4.2
## NetworkSupportWeakAcquaintances      0.81      0.80    0.76      0.58 4.1
## NetworkSupportWeakEmployers          0.75      0.75    0.67      0.50 3.0
## NetworkSupportWeakCoWorkers          0.76      0.77    0.69      0.52 3.3
##                                 alpha se
## NetworkSupportWeakBusiness         0.091
## NetworkSupportWeakAcquaintances    0.092
## NetworkSupportWeakEmployers        0.101
## NetworkSupportWeakCoWorkers        0.099
## 
##  Item statistics 
##                                  n    r r.cor r.drop mean  sd
## NetworkSupportWeakBusiness      89 0.78  0.66   0.60  3.7 1.2
## NetworkSupportWeakAcquaintances 90 0.79  0.67   0.61  3.4 1.2
## NetworkSupportWeakEmployers     68 0.85  0.81   0.73  2.9 1.4
## NetworkSupportWeakCoWorkers     74 0.83  0.78   0.70  3.4 1.4
## 
## Non missing response frequency for each item
##                                    1    2    3    4    5 miss
## NetworkSupportWeakBusiness      0.04 0.15 0.17 0.30 0.34 0.03
## NetworkSupportWeakAcquaintances 0.11 0.12 0.20 0.40 0.17 0.02
## NetworkSupportWeakEmployers     0.26 0.09 0.28 0.22 0.15 0.26
## NetworkSupportWeakCoWorkers     0.16 0.07 0.23 0.27 0.27 0.20

# Investor: Indirect Ties
fa.invindirect <- fa(dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3), 
    1)
fa.diagram(fa.invindirect)

plot of chunk unnamed-chunk-20

fa.invindirect
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                           MR1   h2   u2 com
## NetworkInvestorIndirect1 0.92 0.85 0.15   1
## NetworkInvestorIndirect2 0.92 0.85 0.15   1
## NetworkInvestorIndirect3 0.88 0.78 0.22   1
## 
##                 MR1
## SS loadings    2.48
## Proportion Var 0.83
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  2.56 with Chi Square of  228.3
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  90 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.97
## Multiple R square of scores with factors       0.94
## Minimum correlation of possible factor scores  0.88
alpha(select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, NetworkInvestorIndirect1:NetworkInvestorIndirect3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.94      0.94    0.91      0.83  14 0.072  3.3 1.5
## 
##  lower alpha upper     95% confidence boundaries
## 0.79 0.94 1.08 
## 
##  Reliability if an item is dropped:
##                          raw_alpha std.alpha G6(smc) average_r  S/N
## NetworkInvestorIndirect1      0.90      0.90    0.81      0.81  8.8
## NetworkInvestorIndirect2      0.90      0.90    0.82      0.82  8.9
## NetworkInvestorIndirect3      0.92      0.92    0.85      0.85 11.5
##                          alpha se
## NetworkInvestorIndirect1     0.12
## NetworkInvestorIndirect2     0.12
## NetworkInvestorIndirect3     0.12
## 
##  Item statistics 
##                           n    r r.cor r.drop mean  sd
## NetworkInvestorIndirect1 90 0.95  0.91   0.88  3.4 1.6
## NetworkInvestorIndirect2 90 0.94  0.91   0.87  3.2 1.5
## NetworkInvestorIndirect3 90 0.93  0.87   0.85  3.2 1.6
## 
## Non missing response frequency for each item
##                             1    2    3    4    5 miss
## NetworkInvestorIndirect1 0.23 0.08 0.09 0.27 0.33 0.02
## NetworkInvestorIndirect2 0.22 0.11 0.17 0.21 0.29 0.02
## NetworkInvestorIndirect3 0.26 0.07 0.17 0.22 0.29 0.02

# Investor: Direct Ties
fa.invdirect <- fa(dplyr::select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3), 
    1)
fa.diagram(fa.invdirect)

plot of chunk unnamed-chunk-20

fa.invdirect
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         MR1   h2   u2 com
## NetworkInvestorDirect1 0.86 0.74 0.26   1
## NetworkInvestorDirect2 0.81 0.66 0.34   1
## NetworkInvestorDirect3 0.85 0.72 0.28   1
## 
##                 MR1
## SS loadings    2.12
## Proportion Var 0.71
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  1.58 with Chi Square of  140.9
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  89 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.94
## Multiple R square of scores with factors       0.88
## Minimum correlation of possible factor scores  0.76
alpha(select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, NetworkInvestorDirect1:NetworkInvestorDirect3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.88      0.88    0.83      0.71 7.3 0.081  2.8 1.4
## 
##  lower alpha upper     95% confidence boundaries
## 0.72 0.88 1.04 
## 
##  Reliability if an item is dropped:
##                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
## NetworkInvestorDirect1      0.82      0.82    0.69      0.69 4.5     0.14
## NetworkInvestorDirect2      0.85      0.85    0.73      0.73 5.5     0.13
## NetworkInvestorDirect3      0.82      0.82    0.70      0.70 4.7     0.14
## 
##  Item statistics 
##                         n    r r.cor r.drop mean  sd
## NetworkInvestorDirect1 90 0.90  0.83   0.78  3.0 1.6
## NetworkInvestorDirect2 89 0.89  0.79   0.75  3.0 1.5
## NetworkInvestorDirect3 89 0.90  0.83   0.77  2.5 1.5
## 
## Non missing response frequency for each item
##                           1    2    3    4    5 miss
## NetworkInvestorDirect1 0.29 0.14 0.12 0.21 0.23 0.02
## NetworkInvestorDirect2 0.24 0.18 0.17 0.18 0.24 0.03
## NetworkInvestorDirect3 0.38 0.19 0.12 0.18 0.12 0.03

# Satisfaction
fa.satis <- fa(dplyr::select(smallData, SatisfactionJob:SatisfactionIncubator), 
    1)
## In fa, too many factors requested for this number of variables to use SMC for communality estimates, 1s are used instead
fa.diagram(fa.satis)

plot of chunk unnamed-chunk-20

fa.satis
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, SatisfactionJob:SatisfactionIncubator), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        MR1   h2   u2 com
## SatisfactionJob       0.75 0.56 0.44   1
## SatisfactionIncubator 0.75 0.56 0.44   1
## 
##                 MR1
## SS loadings    1.12
## Proportion Var 0.56
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1  and the objective function was  0.37 with Chi Square of  33.55
## The degrees of freedom for the model are -1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  92 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  1.031
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.85
## Multiple R square of scores with factors       0.72
## Minimum correlation of possible factor scores  0.43
alpha(select(smallData, SatisfactionJob:SatisfactionIncubator))
## Warning: data length [10] is not a sub-multiple or multiple of the number
## of columns [6]
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, SatisfactionJob:SatisfactionIncubator))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd
##       0.66      0.72    0.56      0.56 2.5 0.16  3.7 1.1
## 
##  lower alpha upper     95% confidence boundaries
## 0.35 0.66 0.97 
## 
##  Reliability if an item is dropped:
##                       raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## SatisfactionJob            0.56      0.56    0.31      0.56   NA    0.078
## SatisfactionIncubator      0.56      0.31    0.56        NA 0.56    0.078
## 
##  Item statistics 
##                        n    r r.cor r.drop mean   sd
## SatisfactionJob       92 0.88  0.66   0.56  4.1 0.88
## SatisfactionIncubator 92 0.88  0.66   0.56  3.2 1.48
## 
## Non missing response frequency for each item
##                          1    2    3    4    5 miss
## SatisfactionJob       0.00 0.03 0.25 0.33 0.39    0
## SatisfactionIncubator 0.21 0.12 0.14 0.28 0.25    0

# Team Together
fa.team <- fa(dplyr::select(smallData, TeamHeterogeneity1:TeamPreference3), 
    3)
fa.diagram(fa.team)

plot of chunk unnamed-chunk-20

fa.team
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, TeamHeterogeneity1:TeamPreference3), 
##     nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                 MR1   MR2   MR3   h2   u2 com
## TeamHeterogeneity1             0.95  0.06 -0.16 0.84 0.16 1.1
## TeamHeterogeneity2             0.85 -0.01  0.16 0.85 0.15 1.1
## TeamHeterogeneity3             0.84 -0.05  0.13 0.78 0.22 1.1
## TeamFlexibility1               0.09  0.07  0.65 0.50 0.50 1.1
## TeamFlexibility2               0.00  0.03  0.67 0.45 0.55 1.0
## TeamFlexibility3               0.05  0.03  0.70 0.52 0.48 1.0
## TeamRelativeSizeInverseScored -0.22 -0.28 -0.12 0.22 0.78 2.3
## TeamPreference1                0.07  0.86  0.07 0.81 0.19 1.0
## TeamPreference2               -0.11  0.87  0.10 0.77 0.23 1.1
## TeamPreference3                0.06  0.84 -0.14 0.68 0.32 1.1
## 
##                        MR1  MR2  MR3
## SS loadings           2.50 2.35 1.57
## Proportion Var        0.25 0.24 0.16
## Cumulative Var        0.25 0.49 0.64
## Proportion Explained  0.39 0.37 0.24
## Cumulative Proportion 0.39 0.76 1.00
## 
##  With factor correlations of 
##      MR1  MR2  MR3
## MR1 1.00 0.27 0.37
## MR2 0.27 1.00 0.28
## MR3 0.37 0.28 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  6.05 with Chi Square of  525.3
## The degrees of freedom for the model are 18  and the objective function was  0.58 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic number of observations is  89 with the empirical chi square  11.62  with prob <  0.87 
## The total number of observations was  92  with MLE Chi Square =  48.79  with prob <  0.00011 
## 
## Tucker Lewis Index of factoring reliability =  0.836
## RMSEA index =  0.145  and the 90 % confidence intervals are  0.091 0.183
## BIC =  -32.61
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 MR1  MR2  MR3
## Correlation of scores with factors             0.97 0.95 0.88
## Multiple R square of scores with factors       0.93 0.91 0.77
## Minimum correlation of possible factor scores  0.87 0.81 0.54
alpha(select(smallData, TeamHeterogeneity1:TeamPreference3))
## Warning: Some items were negatively correlated with total scale and were
## automatically reversed.
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, TeamHeterogeneity1:TeamPreference3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd
##       0.83      0.84     0.9      0.34 5.2 0.04  3.6 0.79
## 
##  lower alpha upper     95% confidence boundaries
## 0.76 0.83 0.91 
## 
##  Reliability if an item is dropped:
##                                raw_alpha std.alpha G6(smc) average_r S/N
## TeamHeterogeneity1                  0.81      0.82    0.89      0.34 4.6
## TeamHeterogeneity2                  0.80      0.81    0.88      0.32 4.3
## TeamHeterogeneity3                  0.81      0.82    0.89      0.33 4.4
## TeamFlexibility1                    0.82      0.83    0.90      0.35 4.8
## TeamFlexibility2                    0.83      0.83    0.90      0.35 4.9
## TeamFlexibility3                    0.82      0.83    0.90      0.35 4.8
## TeamRelativeSizeInverseScored-      0.83      0.83    0.91      0.35 4.9
## TeamPreference1                     0.82      0.81    0.88      0.33 4.3
## TeamPreference2                     0.83      0.82    0.89      0.34 4.7
## TeamPreference3                     0.83      0.83    0.89      0.35 4.8
##                                alpha se
## TeamHeterogeneity1                0.045
## TeamHeterogeneity2                0.047
## TeamHeterogeneity3                0.046
## TeamFlexibility1                  0.044
## TeamFlexibility2                  0.044
## TeamFlexibility3                  0.044
## TeamRelativeSizeInverseScored-    0.043
## TeamPreference1                   0.045
## TeamPreference2                   0.044
## TeamPreference3                   0.043
## 
##  Item statistics 
##                                 n    r r.cor r.drop mean   sd
## TeamHeterogeneity1             91 0.66  0.65   0.57  3.6 1.36
## TeamHeterogeneity2             91 0.74  0.75   0.69  3.7 1.36
## TeamHeterogeneity3             91 0.69  0.69   0.63  3.8 1.39
## TeamFlexibility1               91 0.60  0.54   0.50  3.5 1.30
## TeamFlexibility2               91 0.58  0.52   0.47  2.8 1.32
## TeamFlexibility3               91 0.60  0.54   0.50  2.8 1.35
## TeamRelativeSizeInverseScored- 80 0.57  0.49   0.44  3.0 1.28
## TeamPreference1                91 0.72  0.71   0.59  4.2 0.96
## TeamPreference2                91 0.62  0.60   0.46  4.1 1.02
## TeamPreference3                91 0.59  0.57   0.43  4.1 0.99
## 
## Non missing response frequency for each item
##                                  1    2    3    4    5 miss
## TeamHeterogeneity1            0.09 0.18 0.18 0.21 0.35 0.01
## TeamHeterogeneity2            0.12 0.10 0.12 0.32 0.34 0.01
## TeamHeterogeneity3            0.10 0.14 0.08 0.26 0.42 0.01
## TeamFlexibility1              0.10 0.15 0.15 0.33 0.26 0.01
## TeamFlexibility2              0.22 0.19 0.24 0.24 0.11 0.01
## TeamFlexibility3              0.21 0.25 0.21 0.19 0.14 0.01
## TeamRelativeSizeInverseScored 0.12 0.28 0.24 0.20 0.16 0.13
## TeamPreference1               0.02 0.04 0.12 0.36 0.45 0.01
## TeamPreference2               0.02 0.08 0.12 0.38 0.40 0.01
## TeamPreference3               0.01 0.07 0.18 0.31 0.44 0.01

# Team Heterogeneity
fa.thete <- fa(dplyr::select(smallData, TeamHeterogeneity1:TeamHeterogeneity3), 
    1)
fa.diagram(fa.thete)

plot of chunk unnamed-chunk-20

fa.thete
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, TeamHeterogeneity1:TeamHeterogeneity3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                     MR1   h2   u2 com
## TeamHeterogeneity1 0.87 0.76 0.24   1
## TeamHeterogeneity2 0.92 0.85 0.15   1
## TeamHeterogeneity3 0.88 0.78 0.22   1
## 
##                 MR1
## SS loadings    2.38
## Proportion Var 0.79
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  2.22 with Chi Square of  198.2
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  91 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.96
## Multiple R square of scores with factors       0.92
## Minimum correlation of possible factor scores  0.85
alpha(select(smallData, TeamHeterogeneity1:TeamHeterogeneity3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, TeamHeterogeneity1:TeamHeterogeneity3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.92      0.92    0.89      0.79  12 0.074  3.7 1.3
## 
##  lower alpha upper     95% confidence boundaries
## 0.78 0.92 1.07 
## 
##  Reliability if an item is dropped:
##                    raw_alpha std.alpha G6(smc) average_r S/N alpha se
## TeamHeterogeneity1      0.90      0.90    0.81      0.81 8.6     0.12
## TeamHeterogeneity2      0.87      0.87    0.77      0.77 6.7     0.13
## TeamHeterogeneity3      0.89      0.89    0.80      0.80 8.1     0.13
## 
##  Item statistics 
##                     n    r r.cor r.drop mean  sd
## TeamHeterogeneity1 91 0.92  0.86   0.82  3.6 1.4
## TeamHeterogeneity2 91 0.94  0.90   0.86  3.7 1.4
## TeamHeterogeneity3 91 0.93  0.87   0.83  3.8 1.4
## 
## Non missing response frequency for each item
##                       1    2    3    4    5 miss
## TeamHeterogeneity1 0.09 0.18 0.18 0.21 0.35 0.01
## TeamHeterogeneity2 0.12 0.10 0.12 0.32 0.34 0.01
## TeamHeterogeneity3 0.10 0.14 0.08 0.26 0.42 0.01

# Team Flexibility
fa.tflex <- fa(dplyr::select(smallData, TeamFlexibility1:TeamFlexibility3), 
    1)
fa.diagram(fa.tflex)

plot of chunk unnamed-chunk-20

fa.tflex
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, TeamFlexibility1:TeamFlexibility3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   MR1   h2   u2 com
## TeamFlexibility1 0.66 0.43 0.57   1
## TeamFlexibility2 0.71 0.51 0.49   1
## TeamFlexibility3 0.75 0.56 0.44   1
## 
##                 MR1
## SS loadings    1.51
## Proportion Var 0.50
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  0.7 with Chi Square of  62.28
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  91 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.87
## Multiple R square of scores with factors       0.76
## Minimum correlation of possible factor scores  0.51
alpha(select(smallData, TeamFlexibility1:TeamFlexibility3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, TeamFlexibility1:TeamFlexibility3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N ase mean  sd
##       0.75      0.75    0.67       0.5   3 0.1    3 1.1
## 
##  lower alpha upper     95% confidence boundaries
## 0.55 0.75 0.95 
## 
##  Reliability if an item is dropped:
##                  raw_alpha std.alpha G6(smc) average_r S/N alpha se
## TeamFlexibility1      0.70      0.70    0.54      0.54 2.3     0.16
## TeamFlexibility2      0.66      0.66    0.49      0.49 1.9     0.16
## TeamFlexibility3      0.64      0.64    0.47      0.47 1.8     0.16
## 
##  Item statistics 
##                   n    r r.cor r.drop mean  sd
## TeamFlexibility1 91 0.80  0.63   0.55  3.5 1.3
## TeamFlexibility2 91 0.82  0.67   0.58  2.8 1.3
## TeamFlexibility3 91 0.83  0.70   0.60  2.8 1.4
## 
## Non missing response frequency for each item
##                     1    2    3    4    5 miss
## TeamFlexibility1 0.10 0.15 0.15 0.33 0.26 0.01
## TeamFlexibility2 0.22 0.19 0.24 0.24 0.11 0.01
## TeamFlexibility3 0.21 0.25 0.21 0.19 0.14 0.01

# Team Group Work
fa.tgwork <- fa(dplyr::select(smallData, TeamPreference1:TeamPreference3), 1)
fa.diagram(fa.tgwork)

plot of chunk unnamed-chunk-20

fa.tgwork
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, TeamPreference1:TeamPreference3), 
##     nfactors = 1)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  MR1   h2   u2 com
## TeamPreference1 0.89 0.79 0.21   1
## TeamPreference2 0.87 0.76 0.24   1
## TeamPreference3 0.81 0.65 0.35   1
## 
##                 MR1
## SS loadings    2.20
## Proportion Var 0.73
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  1.76 with Chi Square of  157
## The degrees of freedom for the model are 0  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  91 with the empirical chi square  0  with prob <  NA 
## The total number of observations was  92  with MLE Chi Square =  0  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             0.95
## Multiple R square of scores with factors       0.90
## Minimum correlation of possible factor scores  0.80
alpha(select(smallData, TeamPreference1:TeamPreference3))
## 
## Reliability analysis   
## Call: alpha(x = select(smallData, TeamPreference1:TeamPreference3))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.89      0.89    0.85      0.73 8.2 0.079  4.1 0.9
## 
##  lower alpha upper     95% confidence boundaries
## 0.74 0.89 1.05 
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se
## TeamPreference1      0.83      0.83    0.70      0.70 4.7     0.14
## TeamPreference2      0.84      0.84    0.72      0.72 5.1     0.14
## TeamPreference3      0.87      0.87    0.78      0.78 6.9     0.13
## 
##  Item statistics 
##                  n    r r.cor r.drop mean   sd
## TeamPreference1 91 0.92  0.86   0.81  4.2 0.96
## TeamPreference2 91 0.91  0.85   0.80  4.1 1.02
## TeamPreference3 91 0.89  0.79   0.75  4.1 0.99
## 
## Non missing response frequency for each item
##                    1    2    3    4    5 miss
## TeamPreference1 0.02 0.04 0.12 0.36 0.45 0.01
## TeamPreference2 0.02 0.08 0.12 0.38 0.40 0.01
## TeamPreference3 0.01 0.07 0.18 0.31 0.44 0.01

# Business Plan Success
df <- select(smallData, BusinessPlanFinancial:BusinessPlanDoc)
df$num1 <- as.numeric(df[, 1] == "Yes")
df$num2 <- as.numeric(df[, 2] == "Yes")
df$num3 <- as.numeric(df[, 3] == "Yes")
df$num4 <- as.numeric(df[, 4] == "Yes")

fa.bpsuccess <- fa.poly(dplyr::select(df, num1:num4), 1, rotate = "varimax")
## Loading required package: mvtnorm
## Loading required package: parallel
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
fa.diagram(fa.bpsuccess)

plot of chunk unnamed-chunk-20

fa.bpsuccess
## Factor Analysis using method =  minres
## Call: fa.poly(x = dplyr::select(df, num1:num4), nfactors = 1, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       MR1   h2    u2 com
## num1 0.62 0.38 0.619   1
## num2 0.74 0.55 0.447   1
## num3 1.00 1.00 0.005   1
## num4 0.63 0.40 0.597   1
## 
##                 MR1
## SS loadings    2.33
## Proportion Var 0.58
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  1.93 with Chi Square of  171.3
## The degrees of freedom for the model are 2  and the objective function was  0.14 
## 
## The root mean square of the residuals (RMSR) is  0.08 
## The df corrected root mean square of the residuals is  0.15 
## 
## The harmonic number of observations is  92 with the empirical chi square  7.86  with prob <  0.02 
## The total number of observations was  92  with MLE Chi Square =  12.31  with prob <  0.0021 
## 
## Tucker Lewis Index of factoring reliability =  0.811
## RMSEA index =  0.243  and the 90 % confidence intervals are  0.122 0.371
## BIC =  3.27
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                 MR1
## Correlation of scores with factors             1.00
## Multiple R square of scores with factors       1.00
## Minimum correlation of possible factor scores  0.99
alpha(dplyr::select(df, num1:num4))
## 
## Reliability analysis   
## Call: alpha(x = dplyr::select(df, num1:num4))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.67      0.67    0.63      0.34   2 0.093 0.61 0.34
## 
##  lower alpha upper     95% confidence boundaries
## 0.48 0.67 0.85 
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se
## num1      0.62      0.62    0.55      0.35 1.6     0.12
## num2      0.58      0.58    0.50      0.32 1.4     0.12
## num3      0.49      0.50    0.41      0.25 1.0     0.13
## num4      0.69      0.69    0.60      0.43 2.2     0.11
## 
##  Item statistics 
##       n    r r.cor r.drop mean   sd
## num1 89 0.69  0.52   0.42 0.70 0.46
## num2 90 0.73  0.60   0.48 0.77 0.43
## num3 92 0.80  0.73   0.60 0.65 0.48
## num4 89 0.61  0.39   0.31 0.35 0.48
## 
## Non missing response frequency for each item
##         0    1 miss
## num1 0.30 0.70 0.03
## num2 0.23 0.77 0.02
## num3 0.35 0.65 0.00
## num4 0.65 0.35 0.03

CFA with Lavaan Package

Check reliability of constructs.

In isolation the majority of latent variables seems okay. Next step is to test them all together.

Pre-Entry Experience

lavaan.PreEntry.model <- " PreEntryBusiness =~ PreEntryBusinessWork + PreEntryBusinessEducation+ PreEntryBusinessHobbies \n                        PreEntryManagement =~ PreEntryManagement1 + PreEntryManagement2 + PreEntryManagement3 \n                        PreEntryPlanning =~ PrePlanningMarket+ PrePlanningCompetitive+ PrePlanningBrand + PrePlanningModel\n                        OrgCapital =~ StartupExperience "


fit.lavaan.PreEntry.model <- cfa(lavaan.PreEntry.model, smallData)
summary(fit.lavaan.PreEntry.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after  66 iterations
## 
##                                                   Used       Total
##   Number of observations                            89          92
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               59.587
##   Degrees of freedom                                39
##   P-value (Chi-square)                           0.018
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              454.970
##   Degrees of freedom                                55
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.949
##   Tucker-Lewis Index (TLI)                       0.927
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1419.266
##   Loglikelihood unrestricted model (H1)      -1389.473
## 
##   Number of free parameters                         27
##   Akaike (AIC)                                2892.532
##   Bayesian (BIC)                              2959.725
##   Sample-size adjusted Bayesian (BIC)         2874.518
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.077
##   90 Percent Confidence Interval          0.032  0.114
##   P-value RMSEA <= 0.05                          0.133
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.061
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)
## Latent variables:
##   PreEntryBusiness =~
##     PrEntryBsnssW     1.000
##     PrEntryBsnssE     1.590    0.485    3.281    0.001
##     PrEntryBsnssH     1.030    0.282    3.657    0.000
##   PreEntryManagement =~
##     PrEntryMngmn1     1.000
##     PrEntryMngmn2     1.256    0.186    6.769    0.000
##     PrEntryMngmn3     1.155    0.182    6.345    0.000
##   PreEntryPlanning =~
##     PrePlnnngMrkt     1.000
##     PrPlnnngCmptt     1.187    0.129    9.210    0.000
##     PrePlnnngBrnd     1.224    0.142    8.642    0.000
##     PrePlannngMdl     1.318    0.145    9.065    0.000
##   OrgCapital =~
##     StartupExprnc     1.000
## 
## Covariances:
##   PreEntryBusiness ~~
##     PrEntryMngmnt     0.139    0.076    1.833    0.067
##     PrEntryPlnnng     0.138    0.081    1.717    0.086
##     OrgCapital        0.055    0.123    0.450    0.653
##   PreEntryManagement ~~
##     PrEntryPlnnng     0.295    0.106    2.794    0.005
##     OrgCapital        0.061    0.162    0.375    0.708
##   PreEntryPlanning ~~
##     OrgCapital        0.184    0.181    1.017    0.309
## 
## Variances:
##     PrEntryBsnssW     0.975    0.175
##     PrEntryBsnssE     0.247    0.238
##     PrEntryBsnssH     1.008    0.182
##     PrEntryMngmn1     0.591    0.116
##     PrEntryMngmn2     0.261    0.121
##     PrEntryMngmn3     0.843    0.161
##     PrePlnnngMrkt     0.520    0.094
##     PrPlnnngCmptt     0.371    0.084
##     PrePlnnngBrnd     0.582    0.113
##     PrePlannngMdl     0.510    0.110
##     StartupExprnc     0.000
##     PreEntryBsnss     0.364    0.168
##     PrEntryMngmnt     0.681    0.185
##     PrEntryPlnnng     0.878    0.201
##     OrgCapital        2.966    0.445

semPaths(fit.lavaan.PreEntry.model, style = "lisrel", what = "std")

plot of chunk unnamed-chunk-21

Networking and Satisfaction

lavaan.Networking.model <- "  NetworkSupportStrong =~ NetworkSupportStrongSpouse+NetworkSupportStrongRelatives+NetworkSupportStrongFriends \n                              NetworkSupportWeak =~ NetworkSupportWeakBusiness+NetworkSupportWeakAcquaintances+NetworkSupportWeakEmployers+NetworkSupportWeakCoWorkers\n                              NetworkInvestorIndirect =~NetworkInvestorIndirect1+NetworkInvestorIndirect2+NetworkInvestorIndirect3\n                              NetworkInvestorDirect =~ NetworkInvestorDirect1+NetworkInvestorDirect2+NetworkInvestorDirect3\n                              Satisfaction =~ SatisfactionJob+SatisfactionIncubator\n"
fit.lavaan.Networking.model <- cfa(lavaan.Networking.model, smallData)
summary(fit.lavaan.Networking.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after  66 iterations
## 
##                                                   Used       Total
##   Number of observations                            48          92
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              198.742
##   Degrees of freedom                                80
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              739.380
##   Degrees of freedom                               105
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.813
##   Tucker-Lewis Index (TLI)                       0.754
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -982.274
##   Loglikelihood unrestricted model (H1)       -882.903
## 
##   Number of free parameters                         40
##   Akaike (AIC)                                2044.549
##   Bayesian (BIC)                              2119.397
##   Sample-size adjusted Bayesian (BIC)         1993.907
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.176
##   90 Percent Confidence Interval          0.145  0.207
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.113
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)
## Latent variables:
##   NetworkSupportStrong =~
##     NtwrkSpprtStS     1.000
##     NtwrkSpprtStR     1.569    0.445    3.529    0.000
##     NtwrkSpprtStF     1.420    0.397    3.579    0.000
##   NetworkSupportWeak =~
##     NtwrkSpprtWkB     1.000
##     NtwrkSpprtWkA     0.995    0.266    3.746    0.000
##     NtwrkSpprtWkE     1.482    0.310    4.786    0.000
##     NtwrkSpprtWCW     1.454    0.300    4.842    0.000
##   NetworkInvestorIndirect =~
##     NtwrkInvstrI1     1.000
##     NtwrkInvstrI2     0.970    0.076   12.804    0.000
##     NtwrkInvstrI3     0.905    0.080   11.239    0.000
##   NetworkInvestorDirect =~
##     NtwrkInvstrD1     1.000
##     NtwrkInvstrD2     0.900    0.078   11.579    0.000
##     NtwrkInvstrD3     0.830    0.085    9.800    0.000
##   Satisfaction =~
##     SatisfactinJb     1.000
##     StsfctnIncbtr     2.316    0.449    5.154    0.000
## 
## Covariances:
##   NetworkSupportStrong ~~
##     NetwrkSpprtWk     0.122    0.099    1.230    0.219
##     NtwrkInvstrIn     0.511    0.231    2.212    0.027
##     NtwrkInvstrDr     0.346    0.198    1.749    0.080
##     Satisfaction      0.165    0.090    1.833    0.067
##   NetworkSupportWeak ~~
##     NtwrkInvstrIn     0.504    0.232    2.176    0.030
##     NtwrkInvstrDr     0.302    0.206    1.464    0.143
##     Satisfaction      0.084    0.085    0.993    0.321
##   NetworkInvestorIndirect ~~
##     NtwrkInvstrDr     2.214    0.513    4.314    0.000
##     Satisfaction      0.781    0.238    3.277    0.001
##   NetworkInvestorDirect ~~
##     Satisfaction      0.692    0.219    3.164    0.002
## 
## Variances:
##     NtwrkSpprtStS     1.201    0.263
##     NtwrkSpprtStR     0.245    0.199
##     NtwrkSpprtStF     0.604    0.202
##     NtwrkSpprtWkB     0.816    0.187
##     NtwrkSpprtWkA     1.009    0.226
##     NtwrkSpprtWkE     0.556    0.189
##     NtwrkSpprtWCW     0.431    0.168
##     NtwrkInvstrI1     0.410    0.112
##     NtwrkInvstrI2     0.263    0.086
##     NtwrkInvstrI3     0.414    0.106
##     NtwrkInvstrD1     0.273    0.103
##     NtwrkInvstrD2     0.380    0.107
##     NtwrkInvstrD3     0.556    0.134
##     SatisfactinJb     0.375    0.093
##     StsfctnIncbtr     0.272    0.288
##     NtwrkSpprtStr     0.475    0.257
##     NetwrkSpprtWk     0.628    0.258
##     NtwrkInvstrIn     2.574    0.608
##     NtwrkInvstrDr     2.392    0.548
##     Satisfaction      0.395    0.147

semPaths(fit.lavaan.Networking.model, style = "lisrel", what = "std")

plot of chunk unnamed-chunk-22

Very high correlation between Investment Network (indirect and Direct). Probably should use only one.

Team Heterogenity and Flexibility

lavaan.Team.model <- "  TeamHeterogenity =~ TeamHeterogeneity1+TeamHeterogeneity2+TeamHeterogeneity3\n                                TeamFlexibility =~ TeamFlexibility1+TeamFlexibility2+TeamFlexibility3 \n                                TeamPreference =~ TeamPreference1+TeamPreference2+TeamPreference3 "
fit.lavaan.Team.model <- cfa(lavaan.Team.model, smallData)
summary(fit.lavaan.Team.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after  31 iterations
## 
##                                                   Used       Total
##   Number of observations                            91          92
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               65.474
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              519.312
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.914
##   Tucker-Lewis Index (TLI)                       0.871
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1089.742
##   Loglikelihood unrestricted model (H1)      -1057.005
## 
##   Number of free parameters                         21
##   Akaike (AIC)                                2221.484
##   Bayesian (BIC)                              2274.212
##   Sample-size adjusted Bayesian (BIC)         2207.929
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.138
##   90 Percent Confidence Interval          0.098  0.178
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.056
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)
## Latent variables:
##   TeamHeterogenity =~
##     TeamHetrgnty1     1.000
##     TeamHetrgnty2     1.079    0.091   11.882    0.000
##     TeamHetrgnty3     1.042    0.094   11.120    0.000
##   TeamFlexibility =~
##     TeamFlexblty1     1.000
##     TeamFlexblty2     1.004    0.198    5.059    0.000
##     TeamFlexblty3     1.101    0.212    5.187    0.000
##   TeamPreference =~
##     TeamPrefernc1     1.000
##     TeamPrefernc2     1.007    0.098   10.264    0.000
##     TeamPrefernc3     0.907    0.098    9.299    0.000
## 
## Covariances:
##   TeamHeterogenity ~~
##     TeamFlexiblty     0.511    0.157    3.248    0.001
##     TeamPreferenc     0.303    0.122    2.488    0.013
##   TeamFlexibility ~~
##     TeamPreferenc     0.297    0.110    2.694    0.007
## 
## Variances:
##     TeamHetrgnty1     0.474    0.094
##     TeamHetrgnty2     0.252    0.080
##     TeamHetrgnty3     0.425    0.092
##     TeamFlexblty1     0.868    0.182
##     TeamFlexblty2     0.903    0.187
##     TeamFlexblty3     0.824    0.195
##     TeamPrefernc1     0.167    0.054
##     TeamPrefernc2     0.261    0.062
##     TeamPrefernc3     0.353    0.066
##     TeamHetergnty     1.355    0.269
##     TeamFlexiblty     0.811    0.249
##     TeamPreferenc     0.747    0.142

semPaths(fit.lavaan.Team.model, style = "lisrel", what = "std")

plot of chunk unnamed-chunk-23

Mentorship

lavaan.MFQ.model <- "  MFQCareerSupport =~ MFQCareerSupport1+MFQCareerSupport2+MFQCareerSupport3\n                                MFQPsySupport =~ MFQPsySupport1+MFQPsySupport2+MFQPsySupport3 \n                                MFQRoleModeling =~ MFQRoleModeling1+MFQRoleModeling2+MFQRoleModeling3 "
fit.lavaan.MFQ.model <- cfa(lavaan.MFQ.model, smallData)
summary(fit.lavaan.MFQ.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after  35 iterations
## 
##                                                   Used       Total
##   Number of observations                            46          92
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               30.415
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.171
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              364.883
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.980
##   Tucker-Lewis Index (TLI)                       0.971
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -532.146
##   Loglikelihood unrestricted model (H1)       -516.939
## 
##   Number of free parameters                         21
##   Akaike (AIC)                                1106.292
##   Bayesian (BIC)                              1144.694
##   Sample-size adjusted Bayesian (BIC)         1078.848
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.076
##   90 Percent Confidence Interval          0.000  0.150
##   P-value RMSEA <= 0.05                          0.291
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.060
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)
## Latent variables:
##   MFQCareerSupport =~
##     MFQCarrSpprt1     1.000
##     MFQCarrSpprt2     1.243    0.164    7.574    0.000
##     MFQCarrSpprt3     1.272    0.168    7.564    0.000
##   MFQPsySupport =~
##     MFQPsySupprt1     1.000
##     MFQPsySupprt2     0.969    0.068   14.274    0.000
##     MFQPsySupprt3     0.928    0.099    9.420    0.000
##   MFQRoleModeling =~
##     MFQRoleMdlng1     1.000
##     MFQRoleMdlng2     1.390    0.257    5.405    0.000
##     MFQRoleMdlng3     1.494    0.276    5.421    0.000
## 
## Covariances:
##   MFQCareerSupport ~~
##     MFQPsySupport     0.325    0.214    1.519    0.129
##     MFQRoleModlng     0.094    0.129    0.732    0.464
##   MFQPsySupport ~~
##     MFQRoleModlng     0.305    0.179    1.709    0.087
## 
## Variances:
##     MFQCarrSpprt1     0.482    0.122
##     MFQCarrSpprt2     0.291    0.122
##     MFQCarrSpprt3     0.311    0.128
##     MFQPsySupprt1     0.141    0.074
##     MFQPsySupprt2     0.137    0.070
##     MFQPsySupprt3     0.601    0.139
##     MFQRoleMdlng1     0.767    0.172
##     MFQRoleMdlng2     0.096    0.117
##     MFQRoleMdlng3     0.396    0.157
##     MFQCarerSpprt     0.995    0.299
##     MFQPsySupport     1.750    0.399
##     MFQRoleModlng     0.649    0.256

semPaths(fit.lavaan.MFQ.model, style = "lisrel", what = "std")

plot of chunk unnamed-chunk-24

Success: Business Planning

df <- select(smallData, BusinessPlanFinancial:BusinessPlanDoc, SuccessMVP, SuccessLaunch, 
    InvestmentAngel)
## Error: unused arguments (BusinessPlanFinancial:BusinessPlanDoc,
## SuccessMVP, SuccessLaunch, InvestmentAngel)
df[, c("BusinessPlanFinancial", "BusinessPlanMarket", "BusinessPlanBP", "BusinessPlanDoc", 
    "SuccessMVP", "SuccessLaunch", "InvestmentAngel")] <- lapply(df[, c("BusinessPlanFinancial", 
    "BusinessPlanMarket", "BusinessPlanBP", "BusinessPlanDoc", "SuccessMVP", 
    "SuccessLaunch", "InvestmentAngel")], ordered)
## Error: undefined columns selected
lavaan.Success.model <- "  BusinessPlanning =~ BusinessPlanFinancial+BusinessPlanMarket+BusinessPlanBP+BusinessPlanDoc\n                          Success =~ SuccessMVP+SuccessLaunch \n                          Investment =~ InvestmentAngel "
fit.lavaan.Success.model <- cfa(lavaan.Success.model, data = df, ordered = c("BusinessPlanFinancial", 
    "BusinessPlanMarket", "BusinessPlanBP", "BusinessPlanDoc", "SuccessMVP", 
    "SuccessLaunch", "InvestmentAngel"))
## Error: lavaan ERROR: missing observed variables in dataset: SuccessMVP
## SuccessLaunch InvestmentAngel
summary(fit.lavaan.Success.model, fit.measures = TRUE)
## Error: error in evaluating the argument 'object' in selecting a method for function 'summary': Error: object 'fit.lavaan.Success.model' not found

semPaths(fit.lavaan.Success.model, style = "lisrel", what = "std")
## Error: object 'fit.lavaan.Success.model' not found

Not easy to test this in SEM framework with only binary variables

Exploring the Models

SEM Models

Simple SEM Model: Survival ~ PreEntryExperience

lavaan.Simple.model <- " # Measurement Model\n  PreEntryBusiness =~  PreEntryBusinessEducation#+PreEntryBusinessWork #+ PreEntryBusinessHobbies\n  PreEntryManagement =~  PreEntryManagement2 + PreEntryManagement3+ PreEntryManagement1 \n  PreEntryPlanning =~ PrePlanningCompetitive+ PrePlanningBrand + PrePlanningModel                              #+ PrePlanningMarket\n  #NetworkSupportWeak =~ NetworkSupportWeakBusiness+NetworkSupportWeakAcquaintances+NetworkSupportWeakEmployers+NetworkSupportWeakCoWorkers \n#NetworkSupportStrong =~ NetworkSupportStrongFriends #NetworkSupportStrongRelatives+NetworkSupportStrongSpouse\n  # NetworkInvestorIndirect =~NetworkInvestorIndirect1+NetworkInvestorIndirect2+NetworkInvestorIndirect3\n  #NetworkInvestorDirect =~ NetworkInvestorDirect1+NetworkInvestorDirect2+NetworkInvestorDirect3\n  Survival =~ SurvivalFeb14Ordered\n  # Regressions\nSurvival ~ PreEntryBusiness + PreEntryManagement +PreEntryPlanning#+NetworkSupportStrong+NetworkInvestorDirect\n"
PreEntryBusinessWork ~ ~PreEntryBusinessEducation + PreEntryBusinessHobbies
## PreEntryBusinessWork ~ ~PreEntryBusinessEducation + PreEntryBusinessHobbies
smallData$SurvivalFeb14int <- as.integer(factor(smallData$SurvivalFeb14))
smallData[, c("SurvivalFeb14Ordered")] <- as.ordered(smallData[, c("SurvivalFeb14")])

fit.lavaan.Simple.model <- sem(lavaan.Simple.model, smallData, ordered = "SurvivalFeb14Ordered")
## Warning: lavaan WARNING: could not compute standard errors!
## 
## Warning: lavaan WARNING: could not compute scaled test statistic
summary(fit.lavaan.Simple.model, fit.measures = TRUE)
## lavaan (0.5-16) converged normally after  46 iterations
## 
##                                                   Used       Total
##   Number of observations                            91          92
## 
##   Estimator                                       DWLS      Robust
##   Minimum Function Test Statistic                3.531          NA
##   Degrees of freedom                                15          15
##   P-value (Chi-square)                           0.999          NA
##   Scaling correction factor                                     NA
##   Shift parameter                                     
##     for simple second-order correction (Mplus variant)
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              181.979     100.304
##   Degrees of freedom                                28          28
##   P-value                                        0.000       0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    1.000          NA
##   Tucker-Lewis Index (TLI)                       1.139          NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000          NA
##   90 Percent Confidence Interval          0.000  0.000          NA     NA
##   P-value RMSEA <= 0.05                          1.000          NA
## 
## Weighted Root Mean Square Residual:
## 
##   WRMR                                           0.287       0.287
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
##                    Estimate  Std.err  Z-value  P(>|z|)
## Latent variables:
##   PreEntryBusiness =~
##     PrEntryBsnssE     1.000
##   PreEntryManagement =~
##     PrEntryMngmn2     1.000
##     PrEntryMngmn3     0.934
##     PrEntryMngmn1     0.864
##   PreEntryPlanning =~
##     PrPlnnngCmptt     1.000
##     PrePlnnngBrnd     1.108
##     PrePlannngMdl     1.081
##   Survival =~
##     SrvvlFb14Ordr     1.000
## 
## Regressions:
##   Survival ~
##     PreEntryBsnss     0.128
##     PrEntryMngmnt     0.110
##     PrEntryPlnnng     0.337
## 
## Covariances:
##   PreEntryBusiness ~~
##     PrEntryMngmnt     0.242
##     PrEntryPlnnng     0.298
##   PreEntryManagement ~~
##     PrEntryPlnnng     0.395
## 
## Intercepts:
##     PrEntryBsnssE     3.692
##     PrEntryMngmn2     2.890
##     PrEntryMngmn3     2.703
##     PrEntryMngmn1     2.901
##     PrPlnnngCmptt     3.187
##     PrePlnnngBrnd     3.110
##     PrePlannngMdl     3.275
##     PreEntryBsnss     0.000
##     PrEntryMngmnt     0.000
##     PrEntryPlnnng     0.000
##     Survival          0.000
## 
## Thresholds:
##     SrvvlFb14Or|1    -0.265
## 
## Variances:
##     PrEntryBsnssE     0.000
##     PrEntryMngmn2     0.322
##     PrEntryMngmn3     0.867
##     PrEntryMngmn1     0.520
##     PrPlnnngCmptt     0.406
##     PrePlnnngBrnd     0.414
##     PrePlannngMdl     0.629
##     SrvvlFb14Ordr     0.724
##     PreEntryBsnss     1.180
##     PrEntryMngmnt     0.985
##     PrEntryPlnnng     1.174
##     Survival          0.050
semPaths(fit.lavaan.Simple.model, style = "lisrel", what = "std")

plot of chunk unnamed-chunk-26

Unable to compute standard errors and scaled test statistic!

Logistic regression models

Regression with Survey Data Tutorial

province <- read.table(text = "id str clu wt ue91 meanz hou85 diffhou85 smplrat\n\t\t1 1 1 4 4123 2867 26881 -24014 .25\n\t\t2 1 4 4 760 2867 4896 -2029 .25\n\t\t3 1 5 4 721 2867 3730 -863 .25\n\t\t4 1 15 4 142 2867 556 2311 .25\n\t\t5 1 18 4 187 2867 1463 1404 .25\n\t\t6 1 26 4 331 2867 1946 921 .25\n\t\t7 1 30 4 127 2867 834 2033 .25\n\t\t8 1 31 4 219 2867 932 1935 .25", 
    header = TRUE)
province$fpc <- 32  #Sample size!

# Design
province.design <- svydesign(id = ~clu, strata = ~str, data = province, weights = ~wt, 
    fpc = ~fpc)
# Model
(mysvyglm <- svyglm(ue91 ~ hou85, province.design))
## Stratified Independent Sampling design
## svydesign(id = ~clu, strata = ~str, data = province, weights = ~wt, 
##     fpc = ~fpc)
## 
## Call:  svyglm(formula = ue91 ~ hou85, province.design)
## 
## Coefficients:
## (Intercept)        hou85  
##      42.655        0.152  
## 
## Degrees of Freedom: 7 Total (i.e. Null);  6 Residual
## Null Deviance:       12900000 
## Residual Deviance: 22500     AIC: 92.2

Now we try it: Survival ~ Pre Entry Experience

df <- select(smallData, SurvivalFeb14, PreEntryBusinessWork:PrePlanningModel)
## Error: unused arguments (SurvivalFeb14,
## PreEntryBusinessWork:PrePlanningModel)

Simple model: Survival ~ Pre-Entry Experience

xtabs(~SurvivalFeb14 + PreEntryBusinessHobbies, data = smallData)
##              PreEntryBusinessHobbies
## SurvivalFeb14  1  2  3  4  5
##             1  2  5 10 15  4
##             2  5  7  7 20 17

Exploring the Networking and Experience relationship to Incubator Outcomes and Survival/Angel

Develop incubation outcomes measures

Our current incubation outcomes are:

Business plan is measured with 3 variables a la “during the incubator did your team do a business plan.” However for the other variables (MVP:SeriesA), we have dates for the development. It is not possible to attribute the release to the incubator for certain as we do not have a control (startups not coming to the incubator and doing the same outcomes), but it is likely the outcomes are highly related because the accelerator highly encourages finishing at least one of these outcomes.

Specifically, we want to ask how to use the dates data and create an incubation outcomes more numerical data.

First, we have whether the startup had achieved the outcome at the time of survey (end of the incubator). For example, SuccessMVP=Yes/No.

If Yes, then we have the SuccessMVPDate=6/20/2013. This date can be before or during the incubation period.

If No, then we have the SuccessMVPDateAnticipated=12/1/2013. This date is typically during or after the incubation period.

Therefore, we can construct the outcomes during the incubator:

From the SuccessMVPDate, we can see if the outcome was achieved BEFORE, DURING, of AFTER the incubator or 1,2,3.

We can then use the DURING to observe the startups that developed the MVP during the accelerator period.

Let's try it on the MVP first.

Things to do:

Create data set

# First get the incubator outcomes data
df.ioutcomes <- dplyr::select(Data3314, SubjectName, PreEntryBusinessWork:StartupExperience, 
    SurvivalFeb14, InvestmentAngel, BusinessPlanFinancial:InvestmentSeriesADate, 
    IncubatorMentor, SatisfactionJob:SUPPresentDemoDay, NetworkInvestorIndirect1:NetworkInvestorDirect3)

Data Exploration MVPOutcome

True= MVP released during the incubator False= NOT

# Second let's observe how MVPs is distributed
summarize(group_by(df.ioutcomes, SuccessMVP), count = n())
## Source: local data frame [2 x 2]
## 
##   SuccessMVP count
## 1         No    19
## 2        Yes    73
qplot(data = df.ioutcomes, SuccessMVP, geom = "histogram")

plot of chunk unnamed-chunk-31


# How many of these MVPs were developed before/during/after the incubator
# period? code dates
class(df.ioutcomes$SuccessMVPDate)
## [1] "character"
df.ioutcomes$SuccessMVPDate[1]
## [1] "6/20/2013"
df.ioutcomes$SMVPDateFormat <- as.Date(df.ioutcomes$SuccessMVPDate, format = "%m/%d/%Y")
summary(df.ioutcomes$SMVPDateFormat, na.rm = TRUE)
##         Min.      1st Qu.       Median         Mean      3rd Qu. 
## "2011-05-01" "2012-11-23" "2013-04-01" "2013-02-21" "2013-07-01" 
##         Max.         NA's 
## "2014-04-01"         "20"

# calculate before/during/after
df.ioutcomes$SMVPOutcome <- (df.ioutcomes$SMVPDateFormat >= as.Date("2013-01-01")) & 
    (df.ioutcomes$SMVPDateFormat <= as.Date("2013-09-01"))
# View(select(df.ioutcomes, SuccessMVP, SMVPDateFormat, SMVPOutcome)) NAs
# (haven't done it yet) should be FALSE (did not release MVP during
# incubator period)
df.ioutcomes$SMVPOutcome[is.na(df.ioutcomes$SMVPOutcome)] <- FALSE
# replot resummarize
summarize(group_by(df.ioutcomes, SuccessMVP), count = n())
## Source: local data frame [2 x 2]
## 
##   SuccessMVP count
## 1         No    19
## 2        Yes    73
summarize(group_by(df.ioutcomes, SuccessMVP, SMVPOutcome), count = n(), meanDate = mean(SMVPDateFormat), 
    na.rm = TRUE)
## Source: local data frame [3 x 5]
## Groups: SuccessMVP
## 
##   SuccessMVP SMVPOutcome count meanDate na.rm
## 1         No       FALSE    19       NA  TRUE
## 2        Yes       FALSE    23       NA  TRUE
## 3        Yes        TRUE    50    15835  TRUE
qplot(data = df.ioutcomes, SMVPOutcome, geom = "histogram", fill = SuccessMVP)

plot of chunk unnamed-chunk-31


# Relationship to:

# StartupExperience
qplot(data = df.ioutcomes, SuccessMVP, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-31


qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = factor(StartupExperience)) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-31


qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = factor(StartupExperience)) + 
    facet_wrap(~StartupExperience, nrow = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), 
    colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-31


# Boxie
ggplot(data = df.ioutcomes, aes(StartupExperience, SMVPDateFormat, group = factor(StartupExperience))) + 
    geom_boxplot()
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).

plot of chunk unnamed-chunk-31


# Survival
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = SurvivalFeb14) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-31

# Angel
qplot(data = df.ioutcomes, SMVPDateFormat, geom = "histogram", fill = InvestmentAngel) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-31

SuccessLaunch

True= Commercial launch done during the incubator False= NOT

# Second let's observe how SuccessLaunch is distributed
summarize(group_by(df.ioutcomes, SuccessLaunch), count = n())
## Source: local data frame [2 x 2]
## 
##   SuccessLaunch count
## 1            No    51
## 2           Yes    41
qplot(data = df.ioutcomes, SuccessLaunch, geom = "histogram")

plot of chunk unnamed-chunk-32


# How many of these Successful Launches were developed before/during/after
# the incubator period? code dates
class(df.ioutcomes$SuccessLaunchDate)
## [1] "character"
df.ioutcomes$SuccessLaunchDate[4]
## [1] "5/1/2013"
df.ioutcomes$SLaunchDateFormat <- as.Date(df.ioutcomes$SuccessLaunchDate, format = "%m/%d/%Y")
summary(df.ioutcomes$SLaunchDateFormat, na.rm = TRUE)
##         Min.      1st Qu.       Median         Mean      3rd Qu. 
## "2011-10-01" "2012-11-01" "2013-03-16" "2013-02-14" "2013-06-01" 
##         Max.         NA's 
## "2013-12-01"         "54"

# calculate before/during/after
df.ioutcomes$SLaunchOutcome <- (df.ioutcomes$SLaunchDateFormat >= as.Date("2013-01-01")) & 
    (df.ioutcomes$SLaunchDateFormat <= as.Date("2013-09-01"))
# View(select(df.ioutcomes, SuccessLaunch, SLaunchDateFormat,
# SLaunchOutcome)) NAs (haven't done it yet) should be FALSE (did not
# release MVP during incubator period)
df.ioutcomes$SLaunchOutcome[is.na(df.ioutcomes$SLaunchOutcome)] <- FALSE
# replot resummarize
summarize(group_by(df.ioutcomes, SuccessLaunch), count = n())
## Source: local data frame [2 x 2]
## 
##   SuccessLaunch count
## 1            No    51
## 2           Yes    41
summarize(group_by(df.ioutcomes, SuccessLaunch, SLaunchOutcome), count = n(), 
    meanDate = mean(SLaunchDateFormat), na.rm = TRUE)
## Source: local data frame [3 x 5]
## Groups: SuccessLaunch
## 
##   SuccessLaunch SLaunchOutcome count meanDate na.rm
## 1            No          FALSE    51       NA  TRUE
## 2           Yes          FALSE    18       NA  TRUE
## 3           Yes           TRUE    23    15842  TRUE
qplot(data = df.ioutcomes, SLaunchOutcome, geom = "histogram", fill = SuccessLaunch)

plot of chunk unnamed-chunk-32


# Relationship to:

# StartupExperience
qplot(data = df.ioutcomes, SuccessLaunch, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-32


qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = factor(StartupExperience)) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-32


qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = factor(StartupExperience)) + 
    facet_wrap(~StartupExperience, nrow = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), 
    colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-32


# Boxie
ggplot(data = df.ioutcomes, aes(StartupExperience, SLaunchDateFormat, group = factor(StartupExperience))) + 
    geom_boxplot()
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).

plot of chunk unnamed-chunk-32


# Survival
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = SurvivalFeb14) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-32

# Angel
qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = InvestmentAngel) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-32

Seems that more experience shows more successful launches in the incubator

SuccessFirstSale

True= First sale done during the incubator False= NOT

# Second let's observe how SuccessFirstSale is distributed
summarize(group_by(df.ioutcomes, SuccessFirstSale), count = n(), meanStartupExperience = mean(StartupExperience, 
    na.rm = TRUE))
## Source: local data frame [3 x 3]
## 
##   SuccessFirstSale count meanStartupExperience
## 1               No    57                 1.727
## 2              Yes    34                 1.765
## 3               NA     1                 2.000

# How many of these Successful Launches were developed before/during/after
# the incubator period? code dates
class(df.ioutcomes$SuccessFirstSaleDate)
## [1] "character"
df.ioutcomes$SuccessFirstSaleDate[4]
## [1] "7/1/2013"
df.ioutcomes$SFSaleDateFormat <- as.Date(df.ioutcomes$SuccessFirstSaleDate, 
    format = "%m/%d/%Y")
summary(df.ioutcomes$SFSaleDateFormat, na.rm = TRUE)
##         Min.      1st Qu.       Median         Mean      3rd Qu. 
## "2011-05-01" "2012-11-01" "2013-02-01" "2013-01-19" "2013-06-01" 
##         Max.         NA's 
## "2014-11-01"         "59"

# calculate before/during/after
df.ioutcomes$SFSaleOutcome <- (df.ioutcomes$SFSaleDateFormat >= as.Date("2013-01-01")) & 
    (df.ioutcomes$SFSaleDateFormat <= as.Date("2013-09-01"))
# View(select(df.ioutcomes, SuccessFirstSale, SFSaleDateFormat,
# SFSaleOutcome)) NAs (haven't done it yet) should be FALSE (did not release
# MVP during incubator period)
df.ioutcomes$SFSaleOutcome[is.na(df.ioutcomes$SFSaleOutcome)] <- FALSE
# replot resummarize
summarize(group_by(df.ioutcomes, SuccessFirstSale), count = n())
## Source: local data frame [3 x 2]
## 
##   SuccessFirstSale count
## 1               No    57
## 2              Yes    34
## 3               NA     1
summarize(group_by(df.ioutcomes, SuccessFirstSale, SFSaleOutcome), count = n(), 
    meanDate = mean(SFSaleDateFormat, na.rm = TRUE))
## Source: local data frame [4 x 4]
## Groups: SuccessFirstSale
## 
##   SuccessFirstSale SFSaleOutcome count   meanDate
## 1               No         FALSE    57 2014-02-01
## 2              Yes         FALSE    17 2012-09-01
## 3              Yes          TRUE    17 2013-04-30
## 4               NA         FALSE     1       <NA>
qplot(data = df.ioutcomes, SFSaleOutcome, geom = "histogram", fill = SuccessFirstSale)

plot of chunk unnamed-chunk-33


qplot(data = summarize(group_by(df.ioutcomes, StartupExperience, SFSaleOutcome), 
    count = n()), StartupExperience, count, geom = "point", color = SFSaleOutcome)
## Warning: Removed 1 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-33

# density plot
ggplot(data = df.ioutcomes, aes(x = StartupExperience, color = SFSaleOutcome)) + 
    geom_density()
## Warning: Removed 2 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-33


# Relationship to:

# StartupExperience Show
qplot(data = df.ioutcomes, SuccessFirstSale, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-33


qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = factor(StartupExperience)) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-33


qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = factor(StartupExperience)) + 
    facet_wrap(~StartupExperience, nrow = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), 
    colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-33



# Boxie
ggplot(data = df.ioutcomes, aes(StartupExperience, SFSaleDateFormat, group = factor(StartupExperience))) + 
    geom_boxplot()
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-33


# Survival
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = SurvivalFeb14) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-33

# Angel
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = InvestmentAngel) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-33

Same.

SuccessSalesChange

True= Significant increase in sales during the incubator False= NOT

# Second let's observe how SuccessSalesChange is distributed
summarize(group_by(df.ioutcomes, SuccessSalesChange), count = n(), meanStartupExperience = mean(StartupExperience, 
    na.rm = TRUE))
## Source: local data frame [2 x 3]
## 
##   SuccessSalesChange count meanStartupExperience
## 1                 No    69                 1.642
## 2                Yes    23                 2.043

# Create True/False variable
df.ioutcomes$SSalesChange <- as.logical(as.numeric(factor(df.ioutcomes$SuccessSalesChange)) - 
    1)

# What is the distribution of the SuccessSalesChangePercent convert to
# numeric
df.ioutcomes$SuccessSalesChangePercentNumeric <- as.integer(sub(df.ioutcomes$SuccessSalesChangePercent, 
    pattern = "%", replacement = ""))

qplot(data = df.ioutcomes, SuccessSalesChangePercentNumeric, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-34


# Relationship to:

# StartupExperience Show
qplot(data = df.ioutcomes, SuccessSalesChange, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-34

A higher experience shows a higher chance of increase in sales

SuccessStaffChange

True= Significant increase in employees during the incubator False= NOT

# Second let's observe how SuccessSalesChange is distributed
summarize(group_by(df.ioutcomes, SuccessStaffChange), count = n(), meanStartupExperience = mean(StartupExperience, 
    na.rm = TRUE))
## Source: local data frame [2 x 3]
## 
##   SuccessStaffChange count meanStartupExperience
## 1                 No    51                  1.74
## 2                Yes    41                  1.75

# Create True/False variable
df.ioutcomes$SStaffChange <- as.logical(as.numeric(factor(df.ioutcomes$SuccessStaffChange)) - 
    1)

# What is the distribution of the SuccessSalesChangePercent convert to
# numeric
df.ioutcomes$SuccessStaffChangePercentNumeric <- as.integer(sub(df.ioutcomes$SuccessStaffChangePercent, 
    pattern = "%", replacement = ""))

qplot(data = df.ioutcomes, SuccessStaffChangePercentNumeric, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-35


# Relationship to:

# StartupExperience Show
qplot(data = df.ioutcomes, SuccessStaffChange, StartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-35

No relationship observed between startup experience and employees increase.

Table to view experience and incubator outcomes

# Aggregate startup experience so that those who have more than 3 startups
# are = 4
df.ioutcomes$aggStartupExperience <- df.ioutcomes$StartupExperience
df.ioutcomes$aggStartupExperience[df.ioutcomes$StartupExperience > 3 & !is.na(df.ioutcomes$StartupExperience)] <- 4

# means of incubator outcomes over experience
df <- summarize(group_by(df.ioutcomes, aggStartupExperience), count = n(), MVPs = mean(SMVPOutcome), 
    Launch = mean(SLaunchOutcome), SFSale = mean(SSalesChange), SalesInc = mean(SStaffChange), 
    StaffInc = mean(SStaffChange))
summarize(group_by(df.ioutcomes, StartupExperience), count = n(), MVPs = mean(SMVPOutcome), 
    Launch = mean(SLaunchOutcome), SFSale = mean(SSalesChange), SalesInc = mean(SStaffChange), 
    StaffInc = mean(SStaffChange))
## Source: local data frame [9 x 7]
## 
##   StartupExperience count   MVPs Launch  SFSale SalesInc StaffInc
## 1                 0    22 0.5455 0.2727 0.13636   0.3636   0.3636
## 2                 1    26 0.3462 0.1154 0.23077   0.4615   0.4615
## 3                 2    20 0.5000 0.3000 0.55000   0.5500   0.5500
## 4                 3    11 0.7273 0.1818 0.09091   0.4545   0.4545
## 5                 4     5 0.6000 0.4000 0.00000   0.2000   0.2000
## 6                 5     2 1.0000 0.5000 0.00000   0.5000   0.5000
## 7                 6     3 1.0000 0.3333 0.33333   0.6667   0.6667
## 8                10     1 1.0000 1.0000 1.00000   0.0000   0.0000
## 9                NA     2 1.0000 0.5000 0.00000   0.5000   0.5000

# Density plots of incubation outcome Vs StartupExperience

ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SMVPOutcome)) + 
    geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-36


ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SLaunchOutcome)) + 
    geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-36


ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SFSaleOutcome)) + 
    geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-36


ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SSalesChange)) + 
    geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-36


ggplot(data = df.ioutcomes, aes(aggStartupExperience, fill = SStaffChange)) + 
    geom_histogram(position = "dodge", aes(y = ..density..))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-36


ggplot(data = df) + geom_line(aes(aggStartupExperience, MVPs), color = "red") + 
    geom_line(aes(aggStartupExperience, Launch), color = "blue") + geom_line(aes(aggStartupExperience, 
    SFSale), color = "green") + geom_line(aes(aggStartupExperience, SalesInc), 
    color = "black") + geom_line(aes(aggStartupExperience, SalesInc), color = "yellow")
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).

plot of chunk unnamed-chunk-36


# Plotting color timeline with aggregated startup experience MVP
qplot(data = df.ioutcomes, SuccessMVP, aggStartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-36


qplot(data = df.ioutcomes, SMVPDateFormat, geom = "density", group = factor(aggStartupExperience), 
    color = aggStartupExperience, adjust = 2) + geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), 
    colour = "red", linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "red", linetype = "longdash") + scale_colour_gradient(low = "green", 
    high = "blue")
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning: Removed 9 rows containing non-finite values (stat_density).
## Warning: Removed 3 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-36


# Launch
qplot(data = df.ioutcomes, SuccessLaunch, aggStartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-36


qplot(data = df.ioutcomes, SLaunchDateFormat, geom = "histogram", fill = factor(aggStartupExperience)) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-36

# First Sale
qplot(data = df.ioutcomes, SFSaleDateFormat, geom = "histogram", fill = factor(aggStartupExperience)) + 
    geom_vline(xintercept = as.numeric(as.Date("2013-09-01")), colour = "red", 
        linetype = "longdash") + geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), 
    colour = "green", linetype = "longdash")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-36


qplot(data = df.ioutcomes, SuccessFirstSale, aggStartupExperience, geom = "boxplot")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-36

Simple models on Incubator Outcomes

Getting variables ready

# Get Pre-Entry business and management ready Pairs plot
pairs.panels(dplyr::select(smallData, SurvivalFeb14, PreEntryBusinessWork:PreEntryManagement3), 
    pch = ".", main = "Survival and PreEntry Management and Business")

plot of chunk unnamed-chunk-37


# CFA of pre entry business and management
fa.preentrybusiness <- fa(dplyr::select(smallData, PreEntryBusinessWork:PrePlanningModel), 
    3)
fa.diagram(fa.preentrybusiness)

plot of chunk unnamed-chunk-37

fa.preentrybusiness
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, PreEntryBusinessWork:PrePlanningModel), 
##     nfactors = 3)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                             MR1   MR2   MR3   h2   u2 com
## PreEntryBusinessWork      -0.04  0.10  0.48 0.26 0.74 1.1
## PreEntryBusinessEducation  0.07  0.00  0.82 0.69 0.31 1.0
## PreEntryBusinessHobbies   -0.13  0.05  0.56 0.32 0.68 1.1
## PreEntryManagement1        0.08  0.71 -0.07 0.52 0.48 1.1
## PreEntryManagement2        0.01  0.90  0.01 0.81 0.19 1.0
## PreEntryManagement3       -0.05  0.70  0.11 0.52 0.48 1.1
## PrePlanningMarket          0.76  0.11 -0.09 0.63 0.37 1.1
## PrePlanningCompetitive     0.85  0.09 -0.02 0.77 0.23 1.0
## PrePlanningBrand           0.82 -0.01  0.05 0.69 0.31 1.0
## PrePlanningModel           0.90 -0.11  0.06 0.77 0.23 1.0
## 
##                        MR1  MR2  MR3
## SS loadings           2.84 1.88 1.27
## Proportion Var        0.28 0.19 0.13
## Cumulative Var        0.28 0.47 0.60
## Proportion Explained  0.47 0.31 0.21
## Cumulative Proportion 0.47 0.79 1.00
## 
##  With factor correlations of 
##      MR1  MR2  MR3
## MR1 1.00 0.35 0.17
## MR2 0.35 1.00 0.25
## MR3 0.17 0.25 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  4.84 with Chi Square of  420.1
## The degrees of freedom for the model are 18  and the objective function was  0.41 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic number of observations is  92 with the empirical chi square  12.68  with prob <  0.81 
## The total number of observations was  92  with MLE Chi Square =  34.36  with prob <  0.011 
## 
## Tucker Lewis Index of factoring reliability =  0.888
## RMSEA index =  0.107  and the 90 % confidence intervals are  0.046 0.149
## BIC =  -47.03
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                 MR1  MR2  MR3
## Correlation of scores with factors             0.96 0.93 0.87
## Multiple R square of scores with factors       0.91 0.87 0.76
## Minimum correlation of possible factor scores  0.83 0.74 0.51
alpha(select(smallData, PreEntryBusinessWork:PreEntryBusinessHobbies))
## Error: unused argument (PreEntryBusinessWork:PreEntryBusinessHobbies)

# Create Averages pre-entry business, management, and planning averages
df.ioutcomes$PreEntryBusinesAve <- rowMeans(df.ioutcomes[c("PreEntryBusinessWork", 
    "PreEntryBusinessEducation", "PreEntryBusinessHobbies")])
df.ioutcomes$PreEntryMgtAve <- rowMeans(df.ioutcomes[c("PreEntryManagement1", 
    "PreEntryManagement2", "PreEntryManagement3")])
df.ioutcomes$PreEntryPlanAve <- rowMeans(df.ioutcomes[c("PrePlanningMarket", 
    "PrePlanningCompetitive", "PrePlanningBrand", "PrePlanningModel")])

fa.preentrybusiness <- fa(dplyr::select(df.ioutcomes, PreEntryBusinesAve:PreEntryPlanAve), 
    1)
fa.diagram(fa.preentrybusiness)

plot of chunk unnamed-chunk-37


pairs.panels(dplyr::select(df.ioutcomes, PreEntryBusinesAve:PreEntryPlanAve), 
    pch = ".", main = "Survival and PreEntry Management and Business")

plot of chunk unnamed-chunk-37

Experience on Incubator Outcomes (No controls)

# MVP~PreEntryExperience
preentry.vars <- "PreEntryBusinesAve + PreEntryMgtAve + PreEntryPlanAve +StartupExperience"

formula.MVP <- paste0("SMVPOutcome ~ ", preentry.vars)
summary(glm(as.formula(formula.MVP), data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = as.formula(formula.MVP), family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.523  -1.036   0.470   0.928   1.902  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)   
## (Intercept)          -0.384      1.149   -0.33   0.7382   
## PreEntryBusinesAve   -0.441      0.296   -1.49   0.1364   
## PreEntryMgtAve        0.847      0.285    2.97   0.0029 **
## PreEntryPlanAve      -0.277      0.225   -1.23   0.2173   
## StartupExperience     0.363      0.158    2.30   0.0215 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 123.10  on 88  degrees of freedom
## Residual deviance: 105.87  on 84  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 115.9
## 
## Number of Fisher Scoring iterations: 3

# Launch~PreEntryExperience
formula.Launch <- paste0("SLaunchOutcome ~ ", preentry.vars)  #nothing really significant
summary(glm(as.formula(formula.Launch), data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = as.formula(formula.Launch), family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.291  -0.787  -0.556  -0.306   2.245  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          -3.286      1.438   -2.28    0.022 *
## PreEntryBusinesAve   -0.237      0.309   -0.77    0.444  
## PreEntryMgtAve        0.449      0.281    1.60    0.110  
## PreEntryPlanAve       0.387      0.254    1.52    0.128  
## StartupExperience     0.211      0.141    1.50    0.134  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.543  on 88  degrees of freedom
## Residual deviance: 89.801  on 84  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 99.8
## 
## Number of Fisher Scoring iterations: 4
summary(glm(SLaunchOutcome ~ PreEntryBusinesAve + PreEntryManagement2 + PrePlanningModel + 
    StartupExperience, data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = SLaunchOutcome ~ PreEntryBusinesAve + PreEntryManagement2 + 
##     PrePlanningModel + StartupExperience, family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.412  -0.791  -0.519  -0.203   2.300  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)           -3.590      1.497   -2.40    0.016 *
## PreEntryBusinesAve    -0.303      0.326   -0.93    0.352  
## PreEntryManagement2    0.519      0.270    1.92    0.055 .
## PrePlanningModel       0.441      0.215    2.05    0.040 *
## StartupExperience      0.233      0.142    1.64    0.101  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 100.11  on 89  degrees of freedom
## Residual deviance:  87.86  on 85  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 97.86
## 
## Number of Fisher Scoring iterations: 5
formula.Launch <- "SLaunchOutcome ~ PreEntryBusinesAve + PreEntryManagement2 + PrePlanningModel + StartupExperience"

# 1stSale~PreEntryExperience
formula.1stSale <- paste0("SFSaleOutcome ~ ", preentry.vars)
summary(glm(as.formula(formula.1stSale), data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = as.formula(formula.1stSale), family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.008  -0.697  -0.541  -0.374   2.350  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)        -3.40209    1.53648   -2.21    0.027 *
## PreEntryBusinesAve  0.04219    0.33252    0.13    0.899  
## PreEntryMgtAve     -0.06186    0.29293   -0.21    0.833  
## PreEntryPlanAve     0.58205    0.27684    2.10    0.036 *
## StartupExperience  -0.00341    0.15507   -0.02    0.982  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 86.808  on 88  degrees of freedom
## Residual deviance: 81.539  on 84  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 91.54
## 
## Number of Fisher Scoring iterations: 4

# GrowthSales~PreEntryExperience
formula.Sales <- paste0("SSalesChange ~ ", preentry.vars)
summary(glm(as.formula(formula.Sales), data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = as.formula(formula.Sales), family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.365  -0.747  -0.444   0.520   2.109  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -6.6833     1.8219   -3.67  0.00024 ***
## PreEntryBusinesAve   0.8011     0.3729    2.15  0.03171 *  
## PreEntryMgtAve      -0.3072     0.3133   -0.98  0.32681    
## PreEntryPlanAve      0.9827     0.2946    3.34  0.00085 ***
## StartupExperience    0.0525     0.1501    0.35  0.72647    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 101.710  on 88  degrees of freedom
## Residual deviance:  80.664  on 84  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 90.66
## 
## Number of Fisher Scoring iterations: 5

# GrowthEmployees~PreEntryExperience
formula.Staff <- paste0("SStaffChange ~ ", preentry.vars)
summary(glm(as.formula(formula.Staff), data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = as.formula(formula.Staff), family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.012  -0.926  -0.357   0.992   2.167  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -5.4254     1.5584   -3.48   0.0005 ***
## PreEntryBusinesAve   0.3224     0.3027    1.07   0.2868    
## PreEntryMgtAve       0.5290     0.2696    1.96   0.0497 *  
## PreEntryPlanAve      0.8086     0.2480    3.26   0.0011 ** 
## StartupExperience   -0.0917     0.1365   -0.67   0.5017    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122.469  on 88  degrees of freedom
## Residual deviance:  98.337  on 84  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 108.3
## 
## Number of Fisher Scoring iterations: 4

Experience on Incubator Outcomes (Add controls)

add these controls:

Set up variables

# Resource investment (convert from % to num)
df.ioutcomes$ResourceInvestment <- as.integer(sub(df.ioutcomes$ResourceInvestment, 
    pattern = "%", replacement = ""))
df.ioutcomes$ResourceOperations <- as.integer(sub(df.ioutcomes$ResourceOperations, 
    pattern = "%", replacement = ""))
df.ioutcomes$ResourceHuman <- as.integer(sub(df.ioutcomes$ResourceHuman, pattern = "%", 
    replacement = ""))

# Team age (use ordered factor)
ordered(df.ioutcomes$TeamAge)
##  [1] 25-34 years 25-34 years 25-34 years 25-34 years 16-24 years
##  [6] 25-34 years 25-34 years 35-44 years 25-34 years 25-34 years
## [11] 25-34 years 35-44 years 15-34 years 25-34 years 25-34 years
## [16] 24-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [21] 25-34 years 25-34 years 35-44 years 25-34 years 25-34 years
## [26] 35-44 years 25-34 years 25-34 years 25-34 years 16-24 years
## [31] 16-24 years 16-24 years 25-34 years 16-24 years 16-24 years
## [36] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [41] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [46] 25-34 years 25-34 years 25-34 years 25-34 years 16-24 years
## [51] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [56] 25-34 years 25-34 years 25-34 years 25-34 years 25-34 years
## [61] 35-44 years 25-34 years 25-34 years 35-44 years 35-44 years
## [66] 35-44 years 35-44 years 25-34 years 25-34 years 25-34 years
## [71] 35-44 years 25-34 years 25-34 years 25-34 years 25-34 years
## [76] 25-34 years 25-34 years 35-44 years 35-44 years 25-34 years
## [81] 35-44 years 25-34 years 35-44 years 25-34 years 25-34 years
## [86] 35-44 years 25-34 years 35-44 years 25-34 years 25-34 years
## [91] 25-34 years 25-34 years
## 5 Levels: 15-34 years < 16-24 years < 24-34 years < ... < 35-44 years

# Create Averages Team Heterogeneity, Flexibility, and Preference
df.ioutcomes$HeterogeneityAve <- rowMeans(df.ioutcomes[c("TeamHeterogeneity1", 
    "TeamHeterogeneity2", "TeamHeterogeneity3")])
df.ioutcomes$FlexibilityAve <- rowMeans(df.ioutcomes[c("TeamFlexibility1", "TeamFlexibility2", 
    "TeamFlexibility3")])
df.ioutcomes$PreferenceAve <- rowMeans(df.ioutcomes[c("TeamPreference1", "TeamPreference2", 
    "TeamPreference3")])

# Nationality (use factors)
factor(df.ioutcomes$PersonalNationality)
##  [1] Argentina   England     Argentina   Argentina   Indian     
##  [6] India       USA         England     Peru        Chile      
## [11] Brazil      Italy       USA         Argentina   India      
## [16] Mexico      USA         USA         Brazil      Mexico     
## [21] USA         Mexico      India       USA         Venezuela  
## [26] Brazil      USA         Italy       Italy       USA        
## [31] India       Israel      Canada      Uruguay     China      
## [36] USA         USA         USA         Pakistan    USA        
## [41] England     USA         USA         USA         Argentina  
## [46] Argentina   France      Argentina   Chile       Argentina  
## [51] Argentina   Ecuador     Spain       Kenya       France     
## [56] India       USA         USA         Mexico      Indonesia  
## [61] England     Chile       Chile       Peru        Venezuela  
## [66] Argentina   Greece      Ukraine     Austria     USA        
## [71] Chile       India       Spain       USA         Russia     
## [76] Spain       Chile       Netherlands Uruguay     New Zealand
## [81] Chile       Chile       Chile       Chile       Peru       
## [86] Bolivia     Chile       USA         Chile       Chile      
## [91] Sir Lanka   USA        
## 29 Levels: Argentina Austria Bolivia Brazil Canada Chile China ... Venezuela

And now let's add these controls too:

as.logical(as.integer(factor(df.ioutcomes$IncubatorMentor)) - 1)
##  [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
## [12] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [23]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE
## [34] FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
## [45] FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [56] FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
## [67] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [78]  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE
## [89] FALSE FALSE FALSE FALSE

# Fix Personal Degrees (' Vs ’ ')
unique(df.ioutcomes$PersonalDegree)
## [1] "Master’s degree"     "Bachelor’s degree"   "Doctorate degree"   
## [4] "Master's degree"     "Bachelor's degree"   "High school"        
## [7] "Professional degree"
df.ioutcomes$PersonalDegree[df.ioutcomes$PersonalDegree == "Bachelor’s degree"] <- "Bachelor's degree"
df.ioutcomes$PersonalDegree[df.ioutcomes$PersonalDegree == "Master’s degree"] <- "Master's degree"
df.ioutcomes$PersonalDegree[df.ioutcomes$PersonalDegree == "Professional degree"] <- "Doctorate degree"
qplot(data = df.ioutcomes, PersonalDegree, geom = "histogram")

plot of chunk unnamed-chunk-40


# Ethnia
unique(df.ioutcomes$PersonalEthnicity)
## [1] "Latino or Hispanic"              "Caucasian (non-Hispanic)"       
## [3] "Asian/Pacific Islanders"         "African-American (non-Hispanic)"
## [5] "Other"
qplot(data = df.ioutcomes, PersonalEthnicity, geom = "histogram")

plot of chunk unnamed-chunk-40


class(df.ioutcomes$StartupAge)
## [1] "integer"

qplot(data = df.ioutcomes, StartupMBAs, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-40

qplot(data = df.ioutcomes, StartupPhDs, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-40


df.ioutcomes$SUPFastTrack[df.ioutcomes$SUPFastTrack == ""] <- NA
qplot(data = df.ioutcomes, SUPFastTrack, geom = "histogram")

plot of chunk unnamed-chunk-40


df.ioutcomes$SUPApplyDemoDay[df.ioutcomes$SUPApplyDemoDay == ""] <- NA
qplot(data = df.ioutcomes, SUPApplyDemoDay, geom = "histogram")

plot of chunk unnamed-chunk-40


df.ioutcomes$SUPPresentDemoDay[df.ioutcomes$SUPPresentDemoDay == ""] <- NA
qplot(data = df.ioutcomes, SUPPresentDemoDay, geom = "histogram")

plot of chunk unnamed-chunk-40

Now let's add the controls to the iOutcomes ~ Experience

Adding all the controls to the basic models:

# MVP:  MGT + Startup Experience are important
  # With all
control.vars <- "SatisfactionJob + SatisfactionIncubator + ResourceInvestment + ResourceOperations + ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) + HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor + factor(PersonalDegree) + factor(PersonalEthnicity)  + StartupAge + StartupMBAs + StartupPhDs + SUPFastTrack + SUPApplyDemoDay + SUPPresentDemoDay"

formula.MVP2 <- paste0(formula.MVP, " + ", control.vars)
summary(glm(as.formula(formula.MVP2), family=binomial, data=df.ioutcomes)) #MgtAve still significant, but StartupExperience not.  Need to remove some variables.  Control variables SatisfactionIncubator, StartupAge, SUPFastTrack.
## 
## Call:
## glm(formula = as.formula(formula.MVP2), family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0080  -0.5065   0.0507   0.6743   1.7475  
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                         14.8200  5594.9135
## PreEntryBusinesAve                                  -0.3233     0.4770
## PreEntryMgtAve                                       1.1935     0.4906
## PreEntryPlanAve                                      0.1364     0.4701
## StartupExperience                                    0.3685     0.3272
## SatisfactionJob                                     -0.2625     0.5773
## SatisfactionIncubator                                0.8369     0.4783
## ResourceInvestment                                   0.0862     0.1787
## ResourceOperations                                   0.1091     0.1706
## ResourceHuman                                        0.1412     0.1725
## TeamNumberOwners                                     0.1675     0.4230
## factor(df.ioutcomes$TeamAge)16-24 years            -17.0912  3956.1809
## factor(df.ioutcomes$TeamAge)24-34 years              1.3455  5594.8842
## factor(df.ioutcomes$TeamAge)25-34 years            -14.5021  3956.1807
## factor(df.ioutcomes$TeamAge)35-44 years            -13.9638  3956.1809
## HeterogeneityAve                                    -0.0594     0.3791
## FlexibilityAve                                      -1.0609     0.6520
## PreferenceAve                                        0.1765     0.4114
## IncubatorMentorYes                                   1.1328     1.0218
## factor(PersonalDegree)Doctorate degree               1.0779     2.1703
## factor(PersonalDegree)Master's degree                1.0045     0.8940
## factor(PersonalEthnicity)Asian/Pacific Islanders   -13.1631  3956.1811
## factor(PersonalEthnicity)Caucasian (non-Hispanic)  -11.9621  3956.1810
## factor(PersonalEthnicity)Latino or Hispanic        -13.4540  3956.1809
## factor(PersonalEthnicity)Other                     -13.3596  3956.1809
## StartupAge                                          -0.2027     0.0870
## StartupMBAs                                         -0.0235     0.4219
## StartupPhDs                                         -2.2087     1.4101
## SUPFastTrackYes                                     -4.8506     2.0914
## SUPApplyDemoDayYes                                   1.7876     1.1971
## SUPPresentDemoDayYes                                 2.2418     1.5814
##                                                   z value Pr(>|z|)  
## (Intercept)                                          0.00    0.998  
## PreEntryBusinesAve                                  -0.68    0.498  
## PreEntryMgtAve                                       2.43    0.015 *
## PreEntryPlanAve                                      0.29    0.772  
## StartupExperience                                    1.13    0.260  
## SatisfactionJob                                     -0.45    0.649  
## SatisfactionIncubator                                1.75    0.080 .
## ResourceInvestment                                   0.48    0.630  
## ResourceOperations                                   0.64    0.522  
## ResourceHuman                                        0.82    0.413  
## TeamNumberOwners                                     0.40    0.692  
## factor(df.ioutcomes$TeamAge)16-24 years              0.00    0.997  
## factor(df.ioutcomes$TeamAge)24-34 years              0.00    1.000  
## factor(df.ioutcomes$TeamAge)25-34 years              0.00    0.997  
## factor(df.ioutcomes$TeamAge)35-44 years              0.00    0.997  
## HeterogeneityAve                                    -0.16    0.875  
## FlexibilityAve                                      -1.63    0.104  
## PreferenceAve                                        0.43    0.668  
## IncubatorMentorYes                                   1.11    0.268  
## factor(PersonalDegree)Doctorate degree               0.50    0.619  
## factor(PersonalDegree)Master's degree                1.12    0.261  
## factor(PersonalEthnicity)Asian/Pacific Islanders     0.00    0.997  
## factor(PersonalEthnicity)Caucasian (non-Hispanic)    0.00    0.998  
## factor(PersonalEthnicity)Latino or Hispanic          0.00    0.997  
## factor(PersonalEthnicity)Other                       0.00    0.997  
## StartupAge                                          -2.33    0.020 *
## StartupMBAs                                         -0.06    0.956  
## StartupPhDs                                         -1.57    0.117  
## SUPFastTrackYes                                     -2.32    0.020 *
## SUPApplyDemoDayYes                                   1.49    0.135  
## SUPPresentDemoDayYes                                 1.42    0.156  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 115.255  on 83  degrees of freedom
## Residual deviance:  62.501  on 53  degrees of freedom
##   (8 observations deleted due to missingness)
## AIC: 124.5
## 
## Number of Fisher Scoring iterations: 16

formula.MVP2 <- paste0(formula.MVP, " + SatisfactionIncubator + StartupAge + SUPFastTrack")
summary(logit.MVP <- glm(as.formula(formula.MVP2), family=binomial, data=df.ioutcomes)) #MgtAve still significant, but StartupExperience not.  Need to remove some variables.  Control variables SatisfactionIncubator, StartupAge, SUPFastTrackYes.
## 
## Call:
## glm(formula = as.formula(formula.MVP2), family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.066  -0.860   0.415   0.827   1.795  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)            -0.2894     1.2717   -0.23    0.820  
## PreEntryBusinesAve     -0.4287     0.3103   -1.38    0.167  
## PreEntryMgtAve          0.7079     0.3135    2.26    0.024 *
## PreEntryPlanAve        -0.2525     0.2877   -0.88    0.380  
## StartupExperience       0.3350     0.1750    1.91    0.056 .
## SatisfactionIncubator   0.5304     0.2370    2.24    0.025 *
## StartupAge             -0.1072     0.0537   -2.00    0.046 *
## SUPFastTrackYes        -1.1639     0.6772   -1.72    0.086 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 118.476  on 85  degrees of freedom
## Residual deviance:  90.803  on 78  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 106.8
## 
## Number of Fisher Scoring iterations: 4
exp(coef(logit.MVP))
##           (Intercept)    PreEntryBusinesAve        PreEntryMgtAve 
##                0.7487                0.6514                2.0297 
##       PreEntryPlanAve     StartupExperience SatisfactionIncubator 
##                0.7768                1.3980                1.6996 
##            StartupAge       SUPFastTrackYes 
##                0.8983                0.3123



### CONTINUE UPDATING FORMULAS BELOW

### CONSIDER USING A VARIABLE SELECTION PROCESS FOR THE CONTROL VARIABLES

## LAUNCH

# Launch: MGT is important
  # With all
formula.Launch2 <- paste0(formula.Launch, " + ", control.vars)
summary(glm(as.formula(formula.Launch2), family=binomial, data=df.ioutcomes))
## 
## Call:
## glm(formula = as.formula(formula.Launch2), family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5918  -0.4071  -0.0048   0.0000   1.7288  
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                       -5.31e+01   1.52e+04
## PreEntryBusinesAve                                -1.08e+00   9.28e-01
## PreEntryManagement2                                1.06e+00   5.98e-01
## PrePlanningModel                                   7.92e-01   4.99e-01
## StartupExperience                                  7.43e-02   2.89e-01
## SatisfactionJob                                    5.69e-01   9.39e-01
## SatisfactionIncubator                              6.87e-01   5.71e-01
## ResourceInvestment                                 6.10e-01   3.67e-01
## ResourceOperations                                 5.53e-01   3.62e-01
## ResourceHuman                                      5.00e-01   3.52e-01
## TeamNumberOwners                                   2.60e-01   6.33e-01
## factor(df.ioutcomes$TeamAge)16-24 years            1.99e+01   1.08e+04
## factor(df.ioutcomes$TeamAge)24-34 years            6.70e-01   1.52e+04
## factor(df.ioutcomes$TeamAge)25-34 years            1.94e+01   1.08e+04
## factor(df.ioutcomes$TeamAge)35-44 years            1.81e+01   1.08e+04
## HeterogeneityAve                                  -2.40e-01   5.79e-01
## FlexibilityAve                                     2.76e-01   5.67e-01
## PreferenceAve                                     -2.18e+00   8.90e-01
## IncubatorMentorYes                                 7.06e-01   1.44e+00
## factor(PersonalDegree)Doctorate degree             1.60e-01   2.67e+00
## factor(PersonalDegree)High school                 -2.23e+01   1.08e+04
## factor(PersonalDegree)Master's degree             -2.06e-01   1.22e+00
## factor(PersonalEthnicity)Asian/Pacific Islanders  -4.22e+01   1.10e+04
## factor(PersonalEthnicity)Caucasian (non-Hispanic) -1.86e+01   1.08e+04
## factor(PersonalEthnicity)Latino or Hispanic       -1.94e+01   1.08e+04
## factor(PersonalEthnicity)Other                    -4.07e+01   1.15e+04
## StartupAge                                        -6.52e-02   9.61e-02
## StartupMBAs                                       -9.42e-01   7.58e-01
## StartupPhDs                                       -9.97e-01   1.47e+00
## SUPFastTrackYes                                   -2.13e+00   1.86e+00
## SUPApplyDemoDayYes                                 1.82e+00   1.56e+00
## SUPPresentDemoDayYes                               1.97e+00   1.76e+00
##                                                   z value Pr(>|z|)  
## (Intercept)                                          0.00    0.997  
## PreEntryBusinesAve                                  -1.16    0.245  
## PreEntryManagement2                                  1.77    0.077 .
## PrePlanningModel                                     1.59    0.112  
## StartupExperience                                    0.26    0.797  
## SatisfactionJob                                      0.61    0.544  
## SatisfactionIncubator                                1.20    0.229  
## ResourceInvestment                                   1.66    0.097 .
## ResourceOperations                                   1.53    0.126  
## ResourceHuman                                        1.42    0.155  
## TeamNumberOwners                                     0.41    0.682  
## factor(df.ioutcomes$TeamAge)16-24 years              0.00    0.999  
## factor(df.ioutcomes$TeamAge)24-34 years              0.00    1.000  
## factor(df.ioutcomes$TeamAge)25-34 years              0.00    0.999  
## factor(df.ioutcomes$TeamAge)35-44 years              0.00    0.999  
## HeterogeneityAve                                    -0.41    0.679  
## FlexibilityAve                                       0.49    0.627  
## PreferenceAve                                       -2.45    0.014 *
## IncubatorMentorYes                                   0.49    0.624  
## factor(PersonalDegree)Doctorate degree               0.06    0.952  
## factor(PersonalDegree)High school                    0.00    0.998  
## factor(PersonalDegree)Master's degree               -0.17    0.866  
## factor(PersonalEthnicity)Asian/Pacific Islanders     0.00    0.997  
## factor(PersonalEthnicity)Caucasian (non-Hispanic)    0.00    0.999  
## factor(PersonalEthnicity)Latino or Hispanic          0.00    0.999  
## factor(PersonalEthnicity)Other                       0.00    0.997  
## StartupAge                                          -0.68    0.497  
## StartupMBAs                                         -1.24    0.214  
## StartupPhDs                                         -0.68    0.499  
## SUPFastTrackYes                                     -1.15    0.251  
## SUPApplyDemoDayYes                                   1.17    0.243  
## SUPPresentDemoDayYes                                 1.12    0.264  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 95.044  on 84  degrees of freedom
## Residual deviance: 45.959  on 53  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 110
## 
## Number of Fisher Scoring iterations: 18
# ResourceInvestment + ResourceOperations + ResourceHuman + PreferenceAve significant
# keep SUPFastTrack too
formula.Launch2 <- paste0(formula.Launch, " + ResourceInvestment + ResourceOperations + ResourceHuman + PreferenceAve + SUPFastTrack")
summary(glm(as.formula(formula.Launch2), family=binomial, data=df.ioutcomes))
## 
## Call:
## glm(formula = as.formula(formula.Launch2), family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.367  -0.735  -0.454  -0.128   2.336  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          -21.686     16.129   -1.34    0.179  
## PreEntryBusinesAve    -0.330      0.361   -0.92    0.360  
## PreEntryManagement2    0.632      0.285    2.22    0.027 *
## PrePlanningModel       0.598      0.270    2.22    0.026 *
## StartupExperience      0.223      0.154    1.44    0.148  
## ResourceInvestment     0.203      0.164    1.24    0.215  
## ResourceOperations     0.204      0.159    1.29    0.198  
## ResourceHuman          0.192      0.157    1.23    0.220  
## PreferenceAve         -0.608      0.359   -1.69    0.090 .
## SUPFastTrackYes       -0.197      0.661   -0.30    0.766  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 95.044  on 84  degrees of freedom
## Residual deviance: 77.650  on 75  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 97.65
## 
## Number of Fisher Scoring iterations: 5

# 1st Sale: StartupExperience is important
  # With all
summary(glm(SFSaleOutcome~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
            + SatisfactionJob + SatisfactionIncubator
            + ResourceInvestment + ResourceOperations + ResourceHuman
            + TeamNumberOwners
            + factor(df.ioutcomes$TeamAge) #bad
            + HeterogeneityAve + FlexibilityAve + PreferenceAve
            + IncubatorMentor
            + factor(PersonalDegree)
            + factor(PersonalEthnicity) 
            + StartupAge
            + StartupMBAs+ StartupPhDs
            , family=binomial, data=df.ioutcomes))
## 
## Call:
## glm(formula = SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel + StartupExperience + SatisfactionJob + 
##     SatisfactionIncubator + ResourceInvestment + ResourceOperations + 
##     ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) + 
##     HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor + 
##     factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge + 
##     StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8383  -0.4009  -0.0484  -0.0001   2.1919  
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                        -89.0488  9224.4474
## PreEntryBusinesAve                                   0.3939     0.6482
## PreEntryMgtAve                                       0.1943     0.5487
## PrePlanningModel                                     0.8657     0.5928
## StartupExperience                                   -0.2721     0.2715
## SatisfactionJob                                      2.8589     1.2273
## SatisfactionIncubator                                0.7420     0.6589
## ResourceInvestment                                   0.4664     0.2725
## ResourceOperations                                   0.4480     0.2624
## ResourceHuman                                        0.4928     0.2690
## TeamNumberOwners                                    -0.1681     0.4228
## factor(df.ioutcomes$TeamAge)16-24 years              8.9663  6522.6400
## factor(df.ioutcomes$TeamAge)24-34 years             -6.1646  9224.4044
## factor(df.ioutcomes$TeamAge)25-34 years              8.6734  6522.6399
## factor(df.ioutcomes$TeamAge)35-44 years              8.0779  6522.6402
## HeterogeneityAve                                    -0.8847     0.6994
## FlexibilityAve                                      -0.0141     0.6136
## PreferenceAve                                       -0.3062     0.6865
## IncubatorMentorYes                                  -0.1867     1.1177
## factor(PersonalDegree)Doctorate degree              -0.5782     2.0910
## factor(PersonalDegree)High school                  -12.7711  6522.6395
## factor(PersonalDegree)Master's degree                0.2856     1.1815
## factor(PersonalEthnicity)Asian/Pacific Islanders    14.4698  6522.6392
## factor(PersonalEthnicity)Caucasian (non-Hispanic)   15.2282  6522.6390
## factor(PersonalEthnicity)Latino or Hispanic         16.0578  6522.6390
## factor(PersonalEthnicity)Other                      -3.6802  6893.6640
## StartupAge                                           0.0383     0.0770
## StartupMBAs                                          1.5377     0.6441
## StartupPhDs                                         -4.1610     1.9897
##                                                   z value Pr(>|z|)  
## (Intercept)                                         -0.01    0.992  
## PreEntryBusinesAve                                   0.61    0.543  
## PreEntryMgtAve                                       0.35    0.723  
## PrePlanningModel                                     1.46    0.144  
## StartupExperience                                   -1.00    0.316  
## SatisfactionJob                                      2.33    0.020 *
## SatisfactionIncubator                                1.13    0.260  
## ResourceInvestment                                   1.71    0.087 .
## ResourceOperations                                   1.71    0.088 .
## ResourceHuman                                        1.83    0.067 .
## TeamNumberOwners                                    -0.40    0.691  
## factor(df.ioutcomes$TeamAge)16-24 years              0.00    0.999  
## factor(df.ioutcomes$TeamAge)24-34 years              0.00    0.999  
## factor(df.ioutcomes$TeamAge)25-34 years              0.00    0.999  
## factor(df.ioutcomes$TeamAge)35-44 years              0.00    0.999  
## HeterogeneityAve                                    -1.26    0.206  
## FlexibilityAve                                      -0.02    0.982  
## PreferenceAve                                       -0.45    0.656  
## IncubatorMentorYes                                  -0.17    0.867  
## factor(PersonalDegree)Doctorate degree              -0.28    0.782  
## factor(PersonalDegree)High school                    0.00    0.998  
## factor(PersonalDegree)Master's degree                0.24    0.809  
## factor(PersonalEthnicity)Asian/Pacific Islanders     0.00    0.998  
## factor(PersonalEthnicity)Caucasian (non-Hispanic)    0.00    0.998  
## factor(PersonalEthnicity)Latino or Hispanic          0.00    0.998  
## factor(PersonalEthnicity)Other                       0.00    1.000  
## StartupAge                                           0.50    0.618  
## StartupMBAs                                          2.39    0.017 *
## StartupPhDs                                         -2.09    0.037 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.449  on 87  degrees of freedom
## Residual deviance: 46.407  on 59  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 104.4
## 
## Number of Fisher Scoring iterations: 17
# SatisfactionJob + ResourceInvestment + ResourceOperations + ResourceHuman + StartupMBAs + StartupPhDs

  # With Select
summary(glm(SFSaleOutcome~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
            + SatisfactionJob + ResourceInvestment + ResourceOperations + ResourceHuman + StartupMBAs + StartupPhDs
            , family=binomial , data=df.ioutcomes))
## 
## Call:
## glm(formula = SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel + StartupExperience + SatisfactionJob + 
##     ResourceInvestment + ResourceOperations + ResourceHuman + 
##     StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6888  -0.5597  -0.1990  -0.0485   2.4843  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        -58.1289    26.3481   -2.21   0.0274 * 
## PreEntryBusinesAve   0.3146     0.4345    0.72   0.4690   
## PreEntryMgtAve       0.1385     0.3513    0.39   0.6934   
## PrePlanningModel     0.6593     0.3805    1.73   0.0831 . 
## StartupExperience   -0.0405     0.1678   -0.24   0.8091   
## SatisfactionJob      1.4562     0.6150    2.37   0.0179 * 
## ResourceInvestment   0.4615     0.2521    1.83   0.0672 . 
## ResourceOperations   0.4554     0.2444    1.86   0.0625 . 
## ResourceHuman        0.4614     0.2447    1.89   0.0594 . 
## StartupMBAs          1.0906     0.3779    2.89   0.0039 **
## StartupPhDs         -3.0535     1.4627   -2.09   0.0368 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 86.808  on 88  degrees of freedom
## Residual deviance: 58.671  on 78  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 80.67
## 
## Number of Fisher Scoring iterations: 7

# Sales Change:
  # With all
summary(glm(SSalesChange~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
            + SatisfactionJob + SatisfactionIncubator
            + ResourceInvestment + ResourceOperations + ResourceHuman
            + TeamNumberOwners
            + factor(df.ioutcomes$TeamAge) #bad
            + HeterogeneityAve + FlexibilityAve + PreferenceAve
            + IncubatorMentor
            + factor(PersonalDegree)
            + factor(PersonalEthnicity) 
            + StartupAge
            + StartupMBAs+ StartupPhDs
            , family=binomial, data=df.ioutcomes))
## 
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel + StartupExperience + SatisfactionJob + 
##     SatisfactionIncubator + ResourceInvestment + ResourceOperations + 
##     ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) + 
##     HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor + 
##     factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge + 
##     StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1490  -0.3942  -0.0818   0.0000   2.4587  
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                       -4.14e+01   9.22e+03
## PreEntryBusinesAve                                 2.98e-01   7.02e-01
## PreEntryMgtAve                                     2.88e-01   5.58e-01
## PrePlanningModel                                   7.22e-01   6.10e-01
## StartupExperience                                  5.43e-01   4.65e-01
## SatisfactionJob                                    2.21e+00   1.04e+00
## SatisfactionIncubator                             -1.00e+00   5.95e-01
## ResourceInvestment                                 4.85e-03   1.59e-01
## ResourceOperations                                 2.84e-02   1.51e-01
## ResourceHuman                                     -1.09e-03   1.53e-01
## TeamNumberOwners                                   2.85e-01   5.20e-01
## factor(df.ioutcomes$TeamAge)16-24 years            1.27e+01   6.52e+03
## factor(df.ioutcomes$TeamAge)24-34 years           -1.04e+00   9.22e+03
## factor(df.ioutcomes$TeamAge)25-34 years            1.31e+01   6.52e+03
## factor(df.ioutcomes$TeamAge)35-44 years            7.99e+00   6.52e+03
## HeterogeneityAve                                   1.96e-01   5.36e-01
## FlexibilityAve                                    -1.32e+00   8.47e-01
## PreferenceAve                                      3.31e-01   5.73e-01
## IncubatorMentorYes                                 5.46e-01   1.15e+00
## factor(PersonalDegree)Doctorate degree             7.03e-01   2.13e+00
## factor(PersonalDegree)High school                 -1.37e+01   6.52e+03
## factor(PersonalDegree)Master's degree              3.36e+00   1.68e+00
## factor(PersonalEthnicity)Asian/Pacific Islanders   1.10e+01   6.52e+03
## factor(PersonalEthnicity)Caucasian (non-Hispanic)  1.27e+01   6.52e+03
## factor(PersonalEthnicity)Latino or Hispanic        1.10e+01   6.52e+03
## factor(PersonalEthnicity)Other                    -5.20e+00   7.02e+03
## StartupAge                                         2.08e-01   1.02e-01
## StartupMBAs                                       -5.86e-01   4.82e-01
## StartupPhDs                                        1.91e+00   1.33e+00
##                                                   z value Pr(>|z|)  
## (Intercept)                                          0.00    0.996  
## PreEntryBusinesAve                                   0.42    0.671  
## PreEntryMgtAve                                       0.52    0.606  
## PrePlanningModel                                     1.18    0.236  
## StartupExperience                                    1.17    0.242  
## SatisfactionJob                                      2.11    0.035 *
## SatisfactionIncubator                               -1.69    0.092 .
## ResourceInvestment                                   0.03    0.976  
## ResourceOperations                                   0.19    0.851  
## ResourceHuman                                       -0.01    0.994  
## TeamNumberOwners                                     0.55    0.584  
## factor(df.ioutcomes$TeamAge)16-24 years              0.00    0.998  
## factor(df.ioutcomes$TeamAge)24-34 years              0.00    1.000  
## factor(df.ioutcomes$TeamAge)25-34 years              0.00    0.998  
## factor(df.ioutcomes$TeamAge)35-44 years              0.00    0.999  
## HeterogeneityAve                                     0.36    0.715  
## FlexibilityAve                                      -1.56    0.118  
## PreferenceAve                                        0.58    0.563  
## IncubatorMentorYes                                   0.47    0.635  
## factor(PersonalDegree)Doctorate degree               0.33    0.742  
## factor(PersonalDegree)High school                    0.00    0.998  
## factor(PersonalDegree)Master's degree                2.00    0.046 *
## factor(PersonalEthnicity)Asian/Pacific Islanders     0.00    0.999  
## factor(PersonalEthnicity)Caucasian (non-Hispanic)    0.00    0.998  
## factor(PersonalEthnicity)Latino or Hispanic          0.00    0.999  
## factor(PersonalEthnicity)Other                       0.00    0.999  
## StartupAge                                           2.04    0.041 *
## StartupMBAs                                         -1.22    0.224  
## StartupPhDs                                          1.44    0.150  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 96.713  on 87  degrees of freedom
## Residual deviance: 45.085  on 59  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 103.1
## 
## Number of Fisher Scoring iterations: 17

  # With Select: Planning + business + are important
summary(glm(SSalesChange~ PreEntryBusinesAve+PreEntryMgtAve+PreEntryPlanAve+StartupExperience
            + SatisfactionJob + SatisfactionIncubator
            + factor(df.ioutcomes$TeamAge) #bad
            + HeterogeneityAve + FlexibilityAve + PreferenceAve

           , family=binomial, data=df.ioutcomes))
## 
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PreEntryPlanAve + StartupExperience + SatisfactionJob + SatisfactionIncubator + 
##     factor(df.ioutcomes$TeamAge) + HeterogeneityAve + FlexibilityAve + 
##     PreferenceAve, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.584  -0.721  -0.287   0.089   2.018  
## 
## Coefficients:
##                                         Estimate Std. Error z value
## (Intercept)                              -24.331   2399.546   -0.01
## PreEntryBusinesAve                         0.924      0.429    2.15
## PreEntryMgtAve                            -0.160      0.360   -0.44
## PreEntryPlanAve                            1.040      0.391    2.66
## StartupExperience                          0.113      0.192    0.59
## SatisfactionJob                            0.996      0.551    1.81
## SatisfactionIncubator                     -0.483      0.316   -1.53
## factor(df.ioutcomes$TeamAge)16-24 years   11.983   2399.546    0.00
## factor(df.ioutcomes$TeamAge)24-34 years   -2.831   3393.469    0.00
## factor(df.ioutcomes$TeamAge)25-34 years   13.536   2399.545    0.01
## factor(df.ioutcomes$TeamAge)35-44 years   11.977   2399.545    0.00
## HeterogeneityAve                           0.064      0.344    0.19
## FlexibilityAve                            -0.305      0.423   -0.72
## PreferenceAve                              0.306      0.430    0.71
##                                         Pr(>|z|)   
## (Intercept)                               0.9919   
## PreEntryBusinesAve                        0.0312 * 
## PreEntryMgtAve                            0.6564   
## PreEntryPlanAve                           0.0078 **
## StartupExperience                         0.5576   
## SatisfactionJob                           0.0708 . 
## SatisfactionIncubator                     0.1255   
## factor(df.ioutcomes$TeamAge)16-24 years   0.9960   
## factor(df.ioutcomes$TeamAge)24-34 years   0.9993   
## factor(df.ioutcomes$TeamAge)25-34 years   0.9955   
## factor(df.ioutcomes$TeamAge)35-44 years   0.9960   
## HeterogeneityAve                          0.8523   
## FlexibilityAve                            0.4708   
## PreferenceAve                             0.4770   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 98.971  on 87  degrees of freedom
## Residual deviance: 69.997  on 74  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 98
## 
## Number of Fisher Scoring iterations: 15

# Employee Change: Planning + MGT + are important
  # With all
summary(glm(SStaffChange~ PreEntryBusinesAve+PreEntryMgtAve+PrePlanningModel+StartupExperience
           + SatisfactionJob + SatisfactionIncubator
            + ResourceInvestment + ResourceOperations + ResourceHuman
            + TeamNumberOwners
            + factor(df.ioutcomes$TeamAge) #bad
            + HeterogeneityAve + FlexibilityAve + PreferenceAve
            + IncubatorMentor
            + factor(PersonalDegree)
            + factor(PersonalEthnicity) 
            + StartupAge
            + StartupMBAs+ StartupPhDs
            , family=binomial, data=df.ioutcomes))
## 
## Call:
## glm(formula = SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel + StartupExperience + SatisfactionJob + 
##     SatisfactionIncubator + ResourceInvestment + ResourceOperations + 
##     ResourceHuman + TeamNumberOwners + factor(df.ioutcomes$TeamAge) + 
##     HeterogeneityAve + FlexibilityAve + PreferenceAve + IncubatorMentor + 
##     factor(PersonalDegree) + factor(PersonalEthnicity) + StartupAge + 
##     StartupMBAs + StartupPhDs, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -3.333  -0.440  -0.113   0.378   2.085  
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                        -16.2898  5594.9305
## PreEntryBusinesAve                                  -0.3528     0.4519
## PreEntryMgtAve                                       1.4770     0.6484
## PrePlanningModel                                     0.2835     0.4090
## StartupExperience                                    0.1314     0.2667
## SatisfactionJob                                      1.1527     0.6330
## SatisfactionIncubator                               -0.4240     0.3866
## ResourceInvestment                                   0.1176     0.2270
## ResourceOperations                                   0.0850     0.2129
## ResourceHuman                                        0.0858     0.2148
## TeamNumberOwners                                    -0.1748     0.4597
## factor(df.ioutcomes$TeamAge)16-24 years            -15.0980  3956.1816
## factor(df.ioutcomes$TeamAge)24-34 years              0.7288  5594.8842
## factor(df.ioutcomes$TeamAge)25-34 years            -19.6115  3956.1806
## factor(df.ioutcomes$TeamAge)35-44 years            -20.1721  3956.1809
## HeterogeneityAve                                    -0.2077     0.4486
## FlexibilityAve                                       0.0769     0.6218
## PreferenceAve                                       -0.3733     0.5568
## IncubatorMentorYes                                  -0.2395     0.8928
## factor(PersonalDegree)Doctorate degree               0.5435     1.5481
## factor(PersonalDegree)High school                  -17.9487  3956.1816
## factor(PersonalDegree)Master's degree               -1.0434     1.0045
## factor(PersonalEthnicity)Asian/Pacific Islanders    18.2716  3956.1810
## factor(PersonalEthnicity)Caucasian (non-Hispanic)   18.7923  3956.1809
## factor(PersonalEthnicity)Latino or Hispanic         17.7442  3956.1808
## factor(PersonalEthnicity)Other                      17.7172  3956.1810
## StartupAge                                           0.2457     0.0822
## StartupMBAs                                          1.9590     0.7954
## StartupPhDs                                          2.1658     1.3824
##                                                   z value Pr(>|z|)   
## (Intercept)                                          0.00   0.9977   
## PreEntryBusinesAve                                  -0.78   0.4349   
## PreEntryMgtAve                                       2.28   0.0227 * 
## PrePlanningModel                                     0.69   0.4882   
## StartupExperience                                    0.49   0.6222   
## SatisfactionJob                                      1.82   0.0686 . 
## SatisfactionIncubator                               -1.10   0.2728   
## ResourceInvestment                                   0.52   0.6044   
## ResourceOperations                                   0.40   0.6897   
## ResourceHuman                                        0.40   0.6894   
## TeamNumberOwners                                    -0.38   0.7038   
## factor(df.ioutcomes$TeamAge)16-24 years              0.00   0.9970   
## factor(df.ioutcomes$TeamAge)24-34 years              0.00   0.9999   
## factor(df.ioutcomes$TeamAge)25-34 years              0.00   0.9960   
## factor(df.ioutcomes$TeamAge)35-44 years             -0.01   0.9959   
## HeterogeneityAve                                    -0.46   0.6434   
## FlexibilityAve                                       0.12   0.9016   
## PreferenceAve                                       -0.67   0.5026   
## IncubatorMentorYes                                  -0.27   0.7885   
## factor(PersonalDegree)Doctorate degree               0.35   0.7255   
## factor(PersonalDegree)High school                    0.00   0.9964   
## factor(PersonalDegree)Master's degree               -1.04   0.2989   
## factor(PersonalEthnicity)Asian/Pacific Islanders     0.00   0.9963   
## factor(PersonalEthnicity)Caucasian (non-Hispanic)    0.00   0.9962   
## factor(PersonalEthnicity)Latino or Hispanic          0.00   0.9964   
## factor(PersonalEthnicity)Other                       0.00   0.9964   
## StartupAge                                           2.99   0.0028 **
## StartupMBAs                                          2.46   0.0138 * 
## StartupPhDs                                          1.57   0.1172   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 120.352  on 87  degrees of freedom
## Residual deviance:  59.973  on 59  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 118
## 
## Number of Fisher Scoring iterations: 16

  # With Select
summary(glm(SStaffChange~ PreEntryBusinesAve+PreEntryMgtAve+PreEntryPlanAve+StartupExperience
            + SatisfactionJob + SatisfactionIncubator
            + factor(df.ioutcomes$TeamAge) #bad
            + HeterogeneityAve + FlexibilityAve + PreferenceAve

           , data=df.ioutcomes))
## 
## Call:
## glm(formula = SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PreEntryPlanAve + StartupExperience + SatisfactionJob + SatisfactionIncubator + 
##     factor(df.ioutcomes$TeamAge) + HeterogeneityAve + FlexibilityAve + 
##     PreferenceAve, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.847  -0.379   0.000   0.343   0.853  
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                               0.0781     0.5490    0.14
## PreEntryBusinesAve                        0.0622     0.0569    1.09
## PreEntryMgtAve                            0.1182     0.0570    2.07
## PreEntryPlanAve                           0.1651     0.0551    3.00
## StartupExperience                        -0.0167     0.0298   -0.56
## SatisfactionJob                           0.1455     0.0701    2.08
## SatisfactionIncubator                    -0.0619     0.0477   -1.30
## factor(df.ioutcomes$TeamAge)16-24 years  -0.6025     0.5186   -1.16
## factor(df.ioutcomes$TeamAge)24-34 years  -0.2462     0.6455   -0.38
## factor(df.ioutcomes$TeamAge)25-34 years  -0.6963     0.4677   -1.49
## factor(df.ioutcomes$TeamAge)35-44 years  -0.7392     0.4838   -1.53
## HeterogeneityAve                         -0.0217     0.0505   -0.43
## FlexibilityAve                           -0.0399     0.0623   -0.64
## PreferenceAve                            -0.0492     0.0595   -0.83
##                                         Pr(>|t|)   
## (Intercept)                               0.8873   
## PreEntryBusinesAve                        0.2778   
## PreEntryMgtAve                            0.0415 * 
## PreEntryPlanAve                           0.0037 **
## StartupExperience                         0.5767   
## SatisfactionJob                           0.0413 * 
## SatisfactionIncubator                     0.1983   
## factor(df.ioutcomes$TeamAge)16-24 years   0.2490   
## factor(df.ioutcomes$TeamAge)24-34 years   0.7040   
## factor(df.ioutcomes$TeamAge)25-34 years   0.1409   
## factor(df.ioutcomes$TeamAge)35-44 years   0.1308   
## HeterogeneityAve                          0.6687   
## FlexibilityAve                            0.5244   
## PreferenceAve                             0.4102   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1985)
## 
##     Null deviance: 21.716  on 87  degrees of freedom
## Residual deviance: 14.688  on 74  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 122.2
## 
## Number of Fisher Scoring iterations: 2

Appears to be the case that MGT + Bus experience seem more relevant for early stages and then Planning is important for growth and later stages.

iOutcomes ~ investorNetworkingTies

Explore basic model with only Network Investor Indirect/direct

Getting variables ready investor networking ties

# Pairs plot
pairs.panels(dplyr::select(df.ioutcomes, SMVPOutcome, SLaunchOutcome, SFSaleOutcome, 
    NetworkInvestorIndirect1:NetworkInvestorDirect3), pch = ".", main = "Investment Indirect/Direct")

plot of chunk unnamed-chunk-42


# CFA of pre entry business and management
fa.NetworkInvestor <- fa(dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorDirect3), 
    2)
fa.diagram(fa.NetworkInvestor)

plot of chunk unnamed-chunk-42

fa.NetworkInvestor
## Factor Analysis using method =  minres
## Call: fa(r = dplyr::select(smallData, NetworkInvestorIndirect1:NetworkInvestorDirect3), 
##     nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                            MR1   MR2   h2   u2 com
## NetworkInvestorIndirect1  0.95 -0.02 0.87 0.13 1.0
## NetworkInvestorIndirect2  0.91  0.01 0.84 0.16 1.0
## NetworkInvestorIndirect3  0.83  0.07 0.78 0.22 1.0
## NetworkInvestorDirect1    0.20  0.70 0.74 0.26 1.2
## NetworkInvestorDirect2    0.16  0.68 0.65 0.35 1.1
## NetworkInvestorDirect3   -0.10  0.95 0.77 0.23 1.0
## 
##                        MR1  MR2
## SS loadings           2.64 2.01
## Proportion Var        0.44 0.33
## Cumulative Var        0.44 0.77
## Proportion Explained  0.57 0.43
## Cumulative Proportion 0.57 1.00
## 
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.75
## MR2 0.75 1.00
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  5.05 with Chi Square of  445.5
## The degrees of freedom for the model are 4  and the objective function was  0.12 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  89 with the empirical chi square  1.04  with prob <  0.9 
## The total number of observations was  92  with MLE Chi Square =  10.62  with prob <  0.031 
## 
## Tucker Lewis Index of factoring reliability =  0.941
## RMSEA index =  0.14  and the 90 % confidence intervals are  0.037 0.235
## BIC =  -7.47
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                 MR1  MR2
## Correlation of scores with factors             0.97 0.94
## Multiple R square of scores with factors       0.94 0.89
## Minimum correlation of possible factor scores  0.89 0.78
alpha(select(smallData, NetworkInvestorIndirect1:NetworkInvestorDirect3))
## Error: unused argument (NetworkInvestorIndirect1:NetworkInvestorDirect3)

# Create Averages Networking Investor Indirect + Networking Investor Direct
df.ioutcomes$NetInvInd <- rowMeans(df.ioutcomes[c("NetworkInvestorIndirect1", 
    "NetworkInvestorIndirect2")])
df.ioutcomes$NetInvDir <- rowMeans(df.ioutcomes[c("NetworkInvestorDirect1", 
    "NetworkInvestorDirect2")])

pairs.panels(dplyr::select(df.ioutcomes, SMVPOutcome, SLaunchOutcome, SFSaleOutcome, 
    NetInvDir, NetInvInd), pch = ".", main = "Investment Indirect/Direct")

plot of chunk unnamed-chunk-42

Mentorship


## MVP

summary(logit.MVP)  #PreEntryMgtAve
## 
## Call:
## glm(formula = as.formula(formula.MVP2), family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.066  -0.860   0.415   0.827   1.795  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)            -0.2894     1.2717   -0.23    0.820  
## PreEntryBusinesAve     -0.4287     0.3103   -1.38    0.167  
## PreEntryMgtAve          0.7079     0.3135    2.26    0.024 *
## PreEntryPlanAve        -0.2525     0.2877   -0.88    0.380  
## StartupExperience       0.3350     0.1750    1.91    0.056 .
## SatisfactionIncubator   0.5304     0.2370    2.24    0.025 *
## StartupAge             -0.1072     0.0537   -2.00    0.046 *
## SUPFastTrackYes        -1.1639     0.6772   -1.72    0.086 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 118.476  on 85  degrees of freedom
## Residual deviance:  90.803  on 78  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 106.8
## 
## Number of Fisher Scoring iterations: 4
formula.MVP3 <- paste0(formula.MVP2, " + PreEntryMgtAve*IncubatorMentor")
summary(logit.MVP <- glm(as.formula(formula.MVP3), family = binomial, data = df.ioutcomes))  #MgtAve still significant, but StartupExperience not.  Need to remove some variables.  Control variables SatisfactionIncubator, StartupAge, SUPFastTrackYes.
## 
## Call:
## glm(formula = as.formula(formula.MVP3), family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.281  -0.735   0.302   0.768   2.062  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                        -2.5300     1.7951   -1.41   0.1587   
## PreEntryBusinesAve                 -0.4621     0.3482   -1.33   0.1844   
## PreEntryMgtAve                      1.6614     0.5678    2.93   0.0034 **
## PreEntryPlanAve                    -0.5949     0.3376   -1.76   0.0780 . 
## StartupExperience                   0.3310     0.1862    1.78   0.0754 . 
## SatisfactionIncubator               0.5796     0.2720    2.13   0.0331 * 
## StartupAge                         -0.0761     0.0519   -1.47   0.1427   
## SUPFastTrackYes                    -1.2246     0.7101   -1.72   0.0846 . 
## IncubatorMentorYes                  5.0674     2.0302    2.50   0.0126 * 
## PreEntryMgtAve:IncubatorMentorYes  -1.5407     0.6951   -2.22   0.0267 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 118.476  on 85  degrees of freedom
## Residual deviance:  83.123  on 76  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 103.1
## 
## Number of Fisher Scoring iterations: 5
exp(coef(logit.MVP))
##                       (Intercept)                PreEntryBusinesAve 
##                           0.07966                           0.62996 
##                    PreEntryMgtAve                   PreEntryPlanAve 
##                           5.26684                           0.55160 
##                 StartupExperience             SatisfactionIncubator 
##                           1.39243                           1.78525 
##                        StartupAge                   SUPFastTrackYes 
##                           0.92674                           0.29387 
##                IncubatorMentorYes PreEntryMgtAve:IncubatorMentorYes 
##                         158.75989                           0.21424

# PreEntryMgtAve = 1 => slope of IncubatorMentorYes = 5 - 1*-1.5 = 3.5 =>
# odds ratio = exp(3.5) = 33.1 PreEntryMgtAve = 2 => slope of
# IncubatorMentorYes = 5 - 2*-1.5 = 2 => odds ratio = exp(2) = 7.4
# PreEntryMgtAve = 3 => slope of IncubatorMentorYes = 5 - 3*-1.5 = 0.5 =>
# odds ratio = exp(0.5) = 1.65 PreEntryMgtAve = 4 => slope of
# IncubatorMentorYes = 5 - 4*-1.5 = -1 => odds ratio = exp(-1) = 0.37
# PreEntryMgtAve = 5 => slope of IncubatorMentorYes = 5 - 5*-1.5 = -2.5 =>
# odds ratio = exp(-2.5) = 0.08

Experience on Incubator Outcomes (No controls)


## MVP

# MVP~Investment Direct/indirect + PREEEXP
summary(glm(SMVPOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
    NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SMVPOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
##     NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.958  -1.103   0.765   1.013   1.821  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                -0.512      0.539   -0.95    0.343  
## NetworkInvestorIndirect1    0.364      0.302    1.21    0.227  
## NetworkInvestorIndirect2    0.495      0.319    1.55    0.120  
## NetworkInvestorIndirect3   -0.677      0.304   -2.23    0.026 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 124.37  on 89  degrees of freedom
## Residual deviance: 115.88  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 123.9
## 
## Number of Fisher Scoring iterations: 4
# Indirect3 is better
summary(glm(SMVPOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
    NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SMVPOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
##     NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -1.39   -1.19    1.01    1.10    1.25  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)
## (Intercept)             -0.1182     0.4976   -0.24     0.81
## NetworkInvestorDirect1   0.1162     0.2182    0.53     0.59
## NetworkInvestorDirect2   0.0734     0.2144    0.34     0.73
## NetworkInvestorDirect3  -0.1152     0.2337   -0.49     0.62
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122.83  on 88  degrees of freedom
## Residual deviance: 122.15  on 85  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 130.1
## 
## Number of Fisher Scoring iterations: 4
# Direct none is significant
summary(glm(SMVPOutcome ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SMVPOutcome ~ NetInvInd + NetInvDir, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.635  -1.197   0.875   1.083   1.393  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -0.493      0.546   -0.90     0.37
## NetInvInd      0.359      0.219    1.64     0.10
## NetInvDir     -0.179      0.230   -0.78     0.44
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122.83  on 88  degrees of freedom
## Residual deviance: 119.56  on 86  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 125.6
## 
## Number of Fisher Scoring iterations: 4
# Indirect is better but still not significant
summary(glm(SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 + PreEntryMgtAve * 
    NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial(logit)))  #PreMgtAve and interaction terms are significant for MVP
## 
## Call:
## glm(formula = SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 + 
##     PreEntryMgtAve * NetworkInvestorIndirect3, family = binomial(logit), 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.623  -0.945   0.370   0.913   1.928  
## 
## Coefficients:
##                                            Estimate Std. Error z value
## (Intercept)                                -2.47525    1.54731   -1.60
## StartupExperience                          -0.39264    0.43741   -0.90
## NetworkInvestorIndirect3                    0.00162    0.48152    0.00
## PreEntryMgtAve                              1.42302    0.60517    2.35
## StartupExperience:NetworkInvestorIndirect3  0.23000    0.12825    1.79
## NetworkInvestorIndirect3:PreEntryMgtAve    -0.20945    0.16271   -1.29
##                                            Pr(>|z|)  
## (Intercept)                                   0.110  
## StartupExperience                             0.369  
## NetworkInvestorIndirect3                      0.997  
## PreEntryMgtAve                                0.019 *
## StartupExperience:NetworkInvestorIndirect3    0.073 .
## NetworkInvestorIndirect3:PreEntryMgtAve       0.198  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 121.81  on 87  degrees of freedom
## Residual deviance: 101.34  on 82  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 113.3
## 
## Number of Fisher Scoring iterations: 4
# with controls
summary(glm(SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 + PreEntryMgtAve * 
    NetworkInvestorIndirect3 + SatisfactionIncubator + StartupAge + SUPFastTrack, 
    data = df.ioutcomes, family = binomial(logit)))
## 
## Call:
## glm(formula = SMVPOutcome ~ StartupExperience * NetworkInvestorIndirect3 + 
##     PreEntryMgtAve * NetworkInvestorIndirect3 + SatisfactionIncubator + 
##     StartupAge + SUPFastTrack, family = binomial(logit), data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.852  -0.892   0.449   0.767   1.927  
## 
## Coefficients:
##                                            Estimate Std. Error z value
## (Intercept)                                 -1.6863     1.7475   -0.97
## StartupExperience                           -0.3186     0.4625   -0.69
## NetworkInvestorIndirect3                    -0.0552     0.5322   -0.10
## PreEntryMgtAve                               1.1154     0.6128    1.82
## SatisfactionIncubator                        0.3872     0.2370    1.63
## StartupAge                                  -0.1132     0.0525   -2.16
## SUPFastTrackYes                             -0.9357     0.6739   -1.39
## StartupExperience:NetworkInvestorIndirect3   0.2031     0.1376    1.48
## NetworkInvestorIndirect3:PreEntryMgtAve     -0.1551     0.1736   -0.89
##                                            Pr(>|z|)  
## (Intercept)                                   0.335  
## StartupExperience                             0.491  
## NetworkInvestorIndirect3                      0.917  
## PreEntryMgtAve                                0.069 .
## SatisfactionIncubator                         0.102  
## StartupAge                                    0.031 *
## SUPFastTrackYes                               0.165  
## StartupExperience:NetworkInvestorIndirect3    0.140  
## NetworkInvestorIndirect3:PreEntryMgtAve       0.372  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 117.26  on 84  degrees of freedom
## Residual deviance:  88.62  on 76  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 106.6
## 
## Number of Fisher Scoring iterations: 4


## LAUNCH

# Launch~PreEntryExperience + PREEXP
summary(glm(as.formula(formula.Launch2), family = binomial, data = df.ioutcomes))
## 
## Call:
## glm(formula = as.formula(formula.Launch2), family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.367  -0.735  -0.454  -0.128   2.336  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          -21.686     16.129   -1.34    0.179  
## PreEntryBusinesAve    -0.330      0.361   -0.92    0.360  
## PreEntryManagement2    0.632      0.285    2.22    0.027 *
## PrePlanningModel       0.598      0.270    2.22    0.026 *
## StartupExperience      0.223      0.154    1.44    0.148  
## ResourceInvestment     0.203      0.164    1.24    0.215  
## ResourceOperations     0.204      0.159    1.29    0.198  
## ResourceHuman          0.192      0.157    1.23    0.220  
## PreferenceAve         -0.608      0.359   -1.69    0.090 .
## SUPFastTrackYes       -0.197      0.661   -0.30    0.766  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 95.044  on 84  degrees of freedom
## Residual deviance: 77.650  on 75  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 97.65
## 
## Number of Fisher Scoring iterations: 5
# MgtAve and PlanningModel significant
summary(glm(SLaunchOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
    NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SLaunchOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
##     NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.224  -0.831  -0.599   0.664   1.987  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)   
## (Intercept)               -2.1160     0.7060   -3.00   0.0027 **
## NetworkInvestorIndirect1   0.0902     0.3510    0.26   0.7971   
## NetworkInvestorIndirect2   0.5856     0.3556    1.65   0.0996 . 
## NetworkInvestorIndirect3  -0.3851     0.3179   -1.21   0.2258   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 102.304  on 89  degrees of freedom
## Residual deviance:  96.477  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 104.5
## 
## Number of Fisher Scoring iterations: 4
# Indirect 2 is better
summary(glm(SLaunchOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
    NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SLaunchOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
##     NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.175  -0.700  -0.635  -0.432   1.877  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)   
## (Intercept)              -1.646      0.624   -2.64   0.0084 **
## NetworkInvestorDirect1    0.392      0.247    1.58   0.1134   
## NetworkInvestorDirect2    0.189      0.246    0.77   0.4433   
## NetworkInvestorDirect3   -0.507      0.273   -1.86   0.0629 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.543  on 88  degrees of freedom
## Residual deviance: 94.578  on 85  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 102.6
## 
## Number of Fisher Scoring iterations: 4
# Direct3 is significant
summary(glm(SLaunchOutcome ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SLaunchOutcome ~ NetInvInd + NetInvDir, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.048  -0.827  -0.626  -0.479   2.038  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -2.255      0.752   -3.00   0.0027 **
## NetInvInd      0.422      0.250    1.69   0.0914 . 
## NetInvDir     -0.110      0.243   -0.45   0.6511   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.543  on 88  degrees of freedom
## Residual deviance: 95.494  on 86  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 101.5
## 
## Number of Fisher Scoring iterations: 4
# Indirect better
formula.Launch3 <- paste0(formula.Launch2, "+ (PreEntryMgtAve+PrePlanningModel)*(NetworkInvestorIndirect2+NetworkInvestorDirect3)")
summary(glm(as.formula(formula.Launch3), data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = as.formula(formula.Launch3), family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6002  -0.6044  -0.2336  -0.0155   2.4496  
## 
## Coefficients:
##                                           Estimate Std. Error z value
## (Intercept)                               -25.3980    16.4688   -1.54
## PreEntryBusinesAve                         -0.1807     0.4230   -0.43
## PreEntryManagement2                         1.3096     0.8196    1.60
## PrePlanningModel                           -0.5237     0.8243   -0.64
## StartupExperience                           0.5503     0.2202    2.50
## ResourceInvestment                          0.2772     0.1662    1.67
## ResourceOperations                          0.2861     0.1586    1.80
## ResourceHuman                               0.2599     0.1563    1.66
## PreferenceAve                              -0.9159     0.4965   -1.84
## SUPFastTrackYes                             0.7827     0.9751    0.80
## PreEntryMgtAve                             -0.9706     1.2935   -0.75
## NetworkInvestorIndirect2                   -1.3316     1.8256   -0.73
## NetworkInvestorDirect3                     -1.3119     2.2225   -0.59
## PreEntryMgtAve:NetworkInvestorIndirect2     0.6229     0.5261    1.18
## PreEntryMgtAve:NetworkInvestorDirect3      -0.5921     0.5328   -1.11
## PrePlanningModel:NetworkInvestorIndirect2   0.0711     0.2357    0.30
## PrePlanningModel:NetworkInvestorDirect3     0.5517     0.3258    1.69
##                                           Pr(>|z|)  
## (Intercept)                                  0.123  
## PreEntryBusinesAve                           0.669  
## PreEntryManagement2                          0.110  
## PrePlanningModel                             0.525  
## StartupExperience                            0.012 *
## ResourceInvestment                           0.095 .
## ResourceOperations                           0.071 .
## ResourceHuman                                0.096 .
## PreferenceAve                                0.065 .
## SUPFastTrackYes                              0.422  
## PreEntryMgtAve                               0.453  
## NetworkInvestorIndirect2                     0.466  
## NetworkInvestorDirect3                       0.555  
## PreEntryMgtAve:NetworkInvestorIndirect2      0.236  
## PreEntryMgtAve:NetworkInvestorDirect3        0.266  
## PrePlanningModel:NetworkInvestorIndirect2    0.763  
## PrePlanningModel:NetworkInvestorDirect3      0.090 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 91.108  on 81  degrees of freedom
## Residual deviance: 58.429  on 65  degrees of freedom
##   (10 observations deleted due to missingness)
## AIC: 92.43
## 
## Number of Fisher Scoring iterations: 6
# NetworkInvestorDirect3 significant

## FIRST SALE

# 1stSale~Investor Ties + PEEXP
summary(glm(SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve + PrePlanningModel + 
    StartupExperience, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SFSaleOutcome ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel + StartupExperience, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.915  -0.714  -0.554  -0.346   2.415  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -3.4795     1.5814   -2.20    0.028 *
## PreEntryBusinesAve   0.0338     0.3424    0.10    0.921  
## PreEntryMgtAve       0.0146     0.2915    0.05    0.960  
## PrePlanningModel     0.5136     0.2273    2.26    0.024 *
## StartupExperience    0.0131     0.1512    0.09    0.931  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 87.229  on 89  degrees of freedom
## Residual deviance: 81.023  on 85  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 91.02
## 
## Number of Fisher Scoring iterations: 5
# PlanningModel is significant
summary(glm(SFSaleOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
    NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SFSaleOutcome ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
##     NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.917  -0.723  -0.552  -0.415   2.234  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                -2.799      0.856   -3.27   0.0011 **
## NetworkInvestorIndirect1   -0.197      0.393   -0.50   0.6160   
## NetworkInvestorIndirect2    0.235      0.389    0.60   0.5462   
## NetworkInvestorIndirect3    0.353      0.337    1.05   0.2960   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 87.229  on 89  degrees of freedom
## Residual deviance: 82.788  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 90.79
## 
## Number of Fisher Scoring iterations: 4
# None significant
summary(glm(SFSaleOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
    NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SFSaleOutcome ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
##     NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.926  -0.701  -0.525  -0.430   2.231  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -2.520      0.742   -3.39  0.00069 ***
## NetworkInvestorDirect1    0.259      0.302    0.86  0.39051    
## NetworkInvestorDirect2   -0.164      0.306   -0.54  0.59145    
## NetworkInvestorDirect3    0.256      0.299    0.86  0.39235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.847  on 88  degrees of freedom
## Residual deviance: 79.946  on 85  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 87.95
## 
## Number of Fisher Scoring iterations: 4
# None significant
summary(glm(SFSaleOutcome ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SFSaleOutcome ~ NetInvInd + NetInvDir, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.825  -0.706  -0.554  -0.396   2.251  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -2.906      0.894   -3.25   0.0012 **
## NetInvInd      0.294      0.290    1.01   0.3119   
## NetInvDir      0.107      0.276    0.39   0.6983   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.847  on 88  degrees of freedom
## Residual deviance: 80.397  on 86  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 86.4
## 
## Number of Fisher Scoring iterations: 4
# None averages significant
summary(glm(SFSaleOutcome ~ PrePlanningModel * NetInvInd, data = df.ioutcomes, 
    family = binomial))
## 
## Call:
## glm(formula = SFSaleOutcome ~ PrePlanningModel * NetInvInd, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.002  -0.719  -0.518  -0.356   2.362  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                 -2.9386     1.8205   -1.61     0.11
## PrePlanningModel             0.2120     0.5204    0.41     0.68
## NetInvInd                   -0.0726     0.5762   -0.13     0.90
## PrePlanningModel:NetInvInd   0.0726     0.1494    0.49     0.63
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 87.229  on 89  degrees of freedom
## Residual deviance: 80.479  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 88.48
## 
## Number of Fisher Scoring iterations: 5
# None significant in SFSaleOutcome

# GrowthSales~ Investor Ties + PreEntryExperience
summary(glm(SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve + PrePlanningModel, 
    data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3191  -0.7585  -0.4316   0.0792   2.2848  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -6.647      1.845   -3.60  0.00032 ***
## PreEntryBusinesAve    0.880      0.383    2.30  0.02147 *  
## PreEntryMgtAve       -0.130      0.296   -0.44  0.66015    
## PrePlanningModel      0.719      0.228    3.15  0.00163 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.470  on 91  degrees of freedom
## Residual deviance:  84.582  on 88  degrees of freedom
## AIC: 92.58
## 
## Number of Fisher Scoring iterations: 5
# Business and PlanningModel are significant
summary(glm(SSalesChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
    NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SSalesChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
##     NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.364  -0.850  -0.463   0.816   2.150  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -2.787      0.809   -3.44  0.00058 ***
## NetworkInvestorIndirect1   -0.305      0.364   -0.84  0.40240    
## NetworkInvestorIndirect2    0.085      0.356    0.24  0.81155    
## NetworkInvestorIndirect3    0.714      0.323    2.21  0.02699 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 102.304  on 89  degrees of freedom
## Residual deviance:  92.057  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 100.1
## 
## Number of Fisher Scoring iterations: 4
# Indirect3 is significant
summary(glm(SSalesChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
    NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SSalesChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
##     NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.225  -0.658  -0.502  -0.477   2.100  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -2.6732     0.7036   -3.80  0.00015 ***
## NetworkInvestorDirect1   0.0277     0.2855    0.10  0.92283    
## NetworkInvestorDirect2   0.0420     0.2801    0.15  0.88084    
## NetworkInvestorDirect3   0.4872     0.2816    1.73  0.08360 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.543  on 88  degrees of freedom
## Residual deviance: 89.683  on 85  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 97.68
## 
## Number of Fisher Scoring iterations: 4
# Direct3 is significant
summary(glm(SSalesChange ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SSalesChange ~ NetInvInd + NetInvDir, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.073  -0.823  -0.521  -0.440   2.108  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -2.796      0.798   -3.50  0.00046 ***
## NetInvInd      0.151      0.270    0.56  0.57527    
## NetInvDir      0.358      0.265    1.35  0.17609    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.543  on 88  degrees of freedom
## Residual deviance: 92.619  on 86  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 98.62
## 
## Number of Fisher Scoring iterations: 4
# None averages significant but Dir is better

summary(glm(SSalesChange ~ PreEntryBusinesAve + PrePlanningModel * NetworkInvestorDirect3, 
    data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SSalesChange ~ PreEntryBusinesAve + PrePlanningModel * 
##     NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6160  -0.7884  -0.3038  -0.0495   2.3608  
## 
## Coefficients:
##                                         Estimate Std. Error z value
## (Intercept)                              -11.011      3.177   -3.47
## PreEntryBusinesAve                         0.830      0.384    2.16
## PrePlanningModel                           1.510      0.626    2.41
## NetworkInvestorDirect3                     1.761      0.833    2.11
## PrePlanningModel:NetworkInvestorDirect3   -0.350      0.190   -1.84
##                                         Pr(>|z|)    
## (Intercept)                              0.00053 ***
## PreEntryBusinesAve                       0.03055 *  
## PrePlanningModel                         0.01579 *  
## NetworkInvestorDirect3                   0.03453 *  
## PrePlanningModel:NetworkInvestorDirect3  0.06516 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 99.543  on 88  degrees of freedom
## Residual deviance: 75.332  on 84  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 85.33
## 
## Number of Fisher Scoring iterations: 6
# PreBus,PreModel,Direct, interaction are all significant

# Growth Employees~ Investor Ties + PreEntryExperience
summary(glm(SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve + PrePlanningModel + 
    StartupExperience, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SStaffChange ~ PreEntryBusinesAve + PreEntryMgtAve + 
##     PrePlanningModel + StartupExperience, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.967  -0.950  -0.411   1.018   2.032  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)   
## (Intercept)          -4.589      1.429   -3.21   0.0013 **
## PreEntryBusinesAve    0.305      0.296    1.03   0.3033   
## PreEntryMgtAve        0.626      0.252    2.48   0.0131 * 
## PrePlanningModel      0.456      0.181    2.52   0.0118 * 
## StartupExperience    -0.055      0.132   -0.42   0.6780   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 123.65  on 89  degrees of freedom
## Residual deviance: 104.68  on 85  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 114.7
## 
## Number of Fisher Scoring iterations: 4
# MGT and PlanningModel are significant
summary(glm(SStaffChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
    NetworkInvestorIndirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SStaffChange ~ NetworkInvestorIndirect1 + NetworkInvestorIndirect2 + 
##     NetworkInvestorIndirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -1.60   -1.00   -0.65    1.02    1.95  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                -1.960      0.630   -3.11   0.0019 **
## NetworkInvestorIndirect1    0.280      0.299    0.94   0.3483   
## NetworkInvestorIndirect2   -0.302      0.317   -0.95   0.3406   
## NetworkInvestorIndirect3    0.546      0.278    1.97   0.0493 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 124.05  on 89  degrees of freedom
## Residual deviance: 109.86  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 117.9
## 
## Number of Fisher Scoring iterations: 4
# Indirect3 is significant
summary(glm(SStaffChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
    NetworkInvestorDirect3, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SStaffChange ~ NetworkInvestorDirect1 + NetworkInvestorDirect2 + 
##     NetworkInvestorDirect3, family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.545  -0.974  -0.735   1.068   1.698  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)   
## (Intercept)             -1.6715     0.5546   -3.01   0.0026 **
## NetworkInvestorDirect1   0.0680     0.2251    0.30   0.7625   
## NetworkInvestorDirect2   0.3359     0.2228    1.51   0.1316   
## NetworkInvestorDirect3   0.0967     0.2374    0.41   0.6838   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122.47  on 88  degrees of freedom
## Residual deviance: 112.82  on 85  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 120.8
## 
## Number of Fisher Scoring iterations: 4
# None are significant
summary(glm(SStaffChange ~ NetInvInd + NetInvDir, data = df.ioutcomes, family = binomial))
## 
## Call:
## glm(formula = SStaffChange ~ NetInvInd + NetInvDir, family = binomial, 
##     data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -1.52   -1.05   -0.66    1.09    1.81  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -1.962      0.630   -3.12   0.0018 **
## NetInvInd      0.282      0.219    1.28   0.1994   
## NetInvDir      0.266      0.227    1.17   0.2412   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122.47  on 88  degrees of freedom
## Residual deviance: 111.81  on 86  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 117.8
## 
## Number of Fisher Scoring iterations: 4
# None averages significant but Dir is better

summary(glm(SStaffChange ~ PrePlanningModel * NetworkInvestorIndirect3, data = df.ioutcomes, 
    family = binomial))
## 
## Call:
## glm(formula = SStaffChange ~ PrePlanningModel * NetworkInvestorIndirect3, 
##     family = binomial, data = df.ioutcomes)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.456  -1.114  -0.427   0.995   2.210  
## 
## Coefficients:
##                                           Estimate Std. Error z value
## (Intercept)                                 -3.848      1.400   -2.75
## PrePlanningModel                             0.751      0.426    1.76
## NetworkInvestorIndirect3                     0.899      0.437    2.06
## PrePlanningModel:NetworkInvestorIndirect3   -0.153      0.120   -1.28
##                                           Pr(>|z|)   
## (Intercept)                                  0.006 **
## PrePlanningModel                             0.078 . 
## NetworkInvestorIndirect3                     0.040 * 
## PrePlanningModel:NetworkInvestorIndirect3    0.201   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 124.05  on 89  degrees of freedom
## Residual deviance: 107.26  on 86  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 115.3
## 
## Number of Fisher Scoring iterations: 4
# PreModel, InvesDirect are significant but not interaction term

Create a timeline of the startups participating

Can we observe this data in a better way?

http://jason.bryer.org/timeline/ http://www.r-bloggers.com/visualising-twitter-user-timeline-activity-in-r/ http://blog.revolutionanalytics.com/2009/11/charting-time-series-as-calendar-heat-maps-in-r.html Google Search https://www.google.com/search?client=safari&rls=en&q=timeline+in+r&ie=UTF-8&oe=UTF-8