#uploading and viewing of data
#df=read.csv('data.csv', header = TRUE)


#upload of libraries(all these libraries should firstly be installed at the packages section)
library(readr) # reading data of different types
library(ggplot2) # for visualization
library(plyr)
library(dplyr)
library(tidyr) #tidying up data
library(psych) #for modeling
library(caTools)
library(mice) #for cleaning
library(VIM) #for cleaning
library(viridis) #for additional coloring
library(knitr)
download.file("https://raw.githubusercontent.com/ashishsm1986/DATA698/main/Entrepreneurial_Acclimatization.csv",destfile = "Entrepreneurial_Acclimatization.csv",method = "curl")
df=read.csv('Entrepreneurial_Acclimatization.csv', header = TRUE)
#df=read.csv('Entrepreneurial Acclimatization.csv', header = TRUE)
head(df)

The Dataset is a copy of dataset located here:- https://www.kaggle.com/manishkc06/startup-success-prediction as well as answers received from survey conducted for the following questions and they have been mapped into the file as additional columns:-


C1    Top management pushes for Research and Development, Technological acclimatization and innovations hence high entrepreneurial acclimatization?
C2    Projects are executed under stipulated budgets/timelines, with targets met and concerns addressed, hence bringing forth positive influence and satisfaction of different stakeholders.
C3    There is a fast adoption of new technological acclimatization and there is the tendency of using the most updated technology in performing tasks.Single choice.
C4    Top management has high rate of positive support and influence on projects hence high rate of success noted.Single choice.

Lets Dig into the data

## [1] 923  57

The Data has 923 Rows and 57 Columns

state_code zip_code funding_total_usd category_code status
CA 92101 375000 music acquired
CA 95032 40100000 enterprise acquired
CA 92121 2600000 web acquired
CA 95014 40000000 software acquired
CA 94105 1300000 games_video closed
##         ID state_code latitude longitude zip_code    id  city Unnamed..6  name
## [1,] FALSE      FALSE    FALSE     FALSE    FALSE FALSE FALSE      FALSE FALSE
## [2,] FALSE      FALSE    FALSE     FALSE    FALSE FALSE FALSE      FALSE FALSE
## [3,] FALSE      FALSE    FALSE     FALSE    FALSE FALSE FALSE      FALSE FALSE
## [4,] FALSE      FALSE    FALSE     FALSE    FALSE FALSE FALSE      FALSE FALSE
## [5,] FALSE      FALSE    FALSE     FALSE    FALSE FALSE FALSE      FALSE FALSE
## [6,] FALSE      FALSE    FALSE     FALSE    FALSE FALSE FALSE      FALSE FALSE
##      labels founded_at closed_at first_funding_at last_funding_at
## [1,]  FALSE      FALSE     FALSE            FALSE           FALSE
## [2,]  FALSE      FALSE     FALSE            FALSE           FALSE
## [3,]  FALSE      FALSE     FALSE            FALSE           FALSE
## [4,]  FALSE      FALSE     FALSE            FALSE           FALSE
## [5,]  FALSE      FALSE     FALSE            FALSE           FALSE
## [6,]  FALSE      FALSE     FALSE            FALSE           FALSE
##      age_first_funding_year age_last_funding_year age_first_milestone_year
## [1,]                  FALSE                 FALSE                    FALSE
## [2,]                  FALSE                 FALSE                    FALSE
## [3,]                  FALSE                 FALSE                    FALSE
## [4,]                  FALSE                 FALSE                    FALSE
## [5,]                  FALSE                 FALSE                    FALSE
## [6,]                  FALSE                 FALSE                    FALSE
##      age_last_milestone_year relationships funding_rounds funding_total_usd
## [1,]                   FALSE         FALSE          FALSE             FALSE
## [2,]                   FALSE         FALSE          FALSE             FALSE
## [3,]                   FALSE         FALSE          FALSE             FALSE
## [4,]                   FALSE         FALSE          FALSE             FALSE
## [5,]                   FALSE         FALSE          FALSE             FALSE
## [6,]                   FALSE         FALSE          FALSE             FALSE
##      milestones state_code.1 is_CA is_NY is_MA is_TX is_otherstate
## [1,]      FALSE        FALSE FALSE FALSE FALSE FALSE         FALSE
## [2,]      FALSE        FALSE FALSE FALSE FALSE FALSE         FALSE
## [3,]      FALSE        FALSE FALSE FALSE FALSE FALSE         FALSE
## [4,]      FALSE        FALSE FALSE FALSE FALSE FALSE         FALSE
## [5,]      FALSE        FALSE FALSE FALSE FALSE FALSE         FALSE
## [6,]      FALSE        FALSE FALSE FALSE FALSE FALSE         FALSE
##      category_code is_software is_web is_mobile is_enterprise is_advertising
## [1,]         FALSE       FALSE  FALSE     FALSE         FALSE          FALSE
## [2,]         FALSE       FALSE  FALSE     FALSE         FALSE          FALSE
## [3,]         FALSE       FALSE  FALSE     FALSE         FALSE          FALSE
## [4,]         FALSE       FALSE  FALSE     FALSE         FALSE          FALSE
## [5,]         FALSE       FALSE  FALSE     FALSE         FALSE          FALSE
## [6,]         FALSE       FALSE  FALSE     FALSE         FALSE          FALSE
##      is_gamesvideo is_ecommerce is_biotech is_consulting is_othercategory
## [1,]         FALSE        FALSE      FALSE         FALSE            FALSE
## [2,]         FALSE        FALSE      FALSE         FALSE            FALSE
## [3,]         FALSE        FALSE      FALSE         FALSE            FALSE
## [4,]         FALSE        FALSE      FALSE         FALSE            FALSE
## [5,]         FALSE        FALSE      FALSE         FALSE            FALSE
## [6,]         FALSE        FALSE      FALSE         FALSE            FALSE
##      object_id has_VC has_angel has_roundA has_roundB has_roundC has_roundD
## [1,]     FALSE  FALSE     FALSE      FALSE      FALSE      FALSE      FALSE
## [2,]     FALSE  FALSE     FALSE      FALSE      FALSE      FALSE      FALSE
## [3,]     FALSE  FALSE     FALSE      FALSE      FALSE      FALSE      FALSE
## [4,]     FALSE  FALSE     FALSE      FALSE      FALSE      FALSE      FALSE
## [5,]     FALSE  FALSE     FALSE      FALSE      FALSE      FALSE      FALSE
## [6,]     FALSE  FALSE     FALSE      FALSE      FALSE      FALSE      FALSE
##      avg_participants is_top500 Gender   Age Experience Education    C1    C2
## [1,]            FALSE     FALSE  FALSE FALSE      FALSE     FALSE FALSE FALSE
## [2,]            FALSE     FALSE  FALSE FALSE      FALSE     FALSE FALSE FALSE
## [3,]            FALSE     FALSE  FALSE FALSE      FALSE     FALSE FALSE FALSE
## [4,]            FALSE     FALSE  FALSE FALSE      FALSE     FALSE FALSE FALSE
## [5,]            FALSE     FALSE  FALSE FALSE      FALSE     FALSE FALSE FALSE
## [6,]            FALSE     FALSE  FALSE FALSE      FALSE     FALSE FALSE FALSE
##         C3    C4 status
## [1,] FALSE FALSE  FALSE
## [2,] FALSE FALSE  FALSE
## [3,] FALSE FALSE  FALSE
## [4,] FALSE FALSE  FALSE
## [5,] FALSE FALSE  FALSE
## [6,] FALSE FALSE  FALSE
##                       ID               state_code                 latitude 
##                  0.00000                  0.00000                  0.00000 
##                longitude                 zip_code                       id 
##                  0.00000                  0.00000                  0.00000 
##                     city               Unnamed..6                     name 
##                  0.00000                  0.00000                  0.00000 
##                   labels               founded_at                closed_at 
##                  0.00000                  0.00000                  0.00000 
##         first_funding_at          last_funding_at   age_first_funding_year 
##                  0.00000                  0.00000                  0.00000 
##    age_last_funding_year age_first_milestone_year  age_last_milestone_year 
##                  0.00000                 16.46804                 16.46804 
##            relationships           funding_rounds        funding_total_usd 
##                  0.00000                  0.00000                  0.00000 
##               milestones             state_code.1                    is_CA 
##                  0.00000                  0.00000                  0.00000 
##                    is_NY                    is_MA                    is_TX 
##                  0.00000                  0.00000                  0.00000 
##            is_otherstate            category_code              is_software 
##                  0.00000                  0.00000                  0.00000 
##                   is_web                is_mobile            is_enterprise 
##                  0.00000                  0.00000                  0.00000 
##           is_advertising            is_gamesvideo             is_ecommerce 
##                  0.00000                  0.00000                  0.00000 
##               is_biotech            is_consulting         is_othercategory 
##                  0.00000                  0.00000                  0.00000 
##                object_id                   has_VC                has_angel 
##                  0.00000                  0.00000                  0.00000 
##               has_roundA               has_roundB               has_roundC 
##                  0.00000                  0.00000                  0.00000 
##               has_roundD         avg_participants                is_top500 
##                  0.00000                  0.00000                  0.00000 
##                   Gender                      Age               Experience 
##                  0.00000                  0.00000                  0.00000 
##                Education                       C1                       C2 
##                  0.00000                  0.00000                  0.00000 
##                       C3                       C4                   status 
##                  0.00000                  0.00000                  0.00000

##     ID state_code latitude longitude zip_code id city Unnamed..6 name labels
## 771  1          1        1         1        1  1    1          1    1      1
## 152  1          1        1         1        1  1    1          1    1      1
##      0          0        0         0        0  0    0          0    0      0
##     founded_at closed_at first_funding_at last_funding_at
## 771          1         1                1               1
## 152          1         1                1               1
##              0         0                0               0
##     age_first_funding_year age_last_funding_year relationships funding_rounds
## 771                      1                     1             1              1
## 152                      1                     1             1              1
##                          0                     0             0              0
##     funding_total_usd milestones state_code.1 is_CA is_NY is_MA is_TX
## 771                 1          1            1     1     1     1     1
## 152                 1          1            1     1     1     1     1
##                     0          0            0     0     0     0     0
##     is_otherstate category_code is_software is_web is_mobile is_enterprise
## 771             1             1           1      1         1             1
## 152             1             1           1      1         1             1
##                 0             0           0      0         0             0
##     is_advertising is_gamesvideo is_ecommerce is_biotech is_consulting
## 771              1             1            1          1             1
## 152              1             1            1          1             1
##                  0             0            0          0             0
##     is_othercategory object_id has_VC has_angel has_roundA has_roundB
## 771                1         1      1         1          1          1
## 152                1         1      1         1          1          1
##                    0         0      0         0          0          0
##     has_roundC has_roundD avg_participants is_top500 Gender Age Experience
## 771          1          1                1         1      1   1          1
## 152          1          1                1         1      1   1          1
##              0          0                0         0      0   0          0
##     Education C1 C2 C3 C4 status age_first_milestone_year
## 771         1  1  1  1  1      1                        1
## 152         1  1  1  1  1      1                        0
##             0  0  0  0  0      0                      152
##     age_last_milestone_year    
## 771                       1   0
## 152                       0   2
##                         152 304

Looking at the two attributes that do have actual missing value entries, we have it that the two are having a total missing values of 16.46804% in total. The imputation of the two data attributes is as the code lines that follow below;

##   ID state_code      id          city              name labels founded_at
## 1  1         CA  c:6669     San Diego       Bandsintown      1   1/1/2007
## 2  2         CA c:16283     Los Gatos         TriCipher      1   1/1/2000
## 3  3         CA c:65620     San Diego             Plixi      1  3/18/2009
## 4  4         CA c:42668     Cupertino Solidcore Systems      1   1/1/2002
## 5  5         CA c:65806 San Francisco    Inhale Digital      0   8/1/2010
## 6  6         CA c:22898 Mountain View  Matisse Networks      0   1/1/2002
##   closed_at first_funding_at last_funding_at age_first_funding_year
## 1                   4/1/2009        1/1/2010                 2.2493
## 2                  2/14/2005      12/28/2009                 5.1260
## 3                  3/30/2010       3/30/2010                 1.0329
## 4                  2/17/2005       4/25/2007                 3.1315
## 5 10/1/2012         8/1/2010        4/1/2012                 0.0000
## 6 2/15/2009        7/18/2006       7/18/2006                 4.5452
##   age_last_funding_year age_first_milestone_year age_last_milestone_year
## 1                3.0027                   4.6685                  6.7041
## 2                9.9973                   7.0055                  7.0055
## 3                1.0329                   1.4575                  2.2055
## 4                5.3151                   6.0027                  6.0027
## 5                1.6685                   0.0384                  0.0384
## 6                4.5452                   5.0027                  5.0027
##   relationships funding_rounds funding_total_usd milestones is_CA is_NY is_MA
## 1             3              3            375000          3     1     0     0
## 2             9              4          40100000          1     1     0     0
## 3             5              1           2600000          2     1     0     0
## 4             5              3          40000000          1     1     0     0
## 5             2              2           1300000          1     1     0     0
## 6             3              1           7500000          1     1     0     0
##   is_TX is_otherstate   category_code is_software is_web is_mobile
## 1     0             0           music           0      0         0
## 2     0             0      enterprise           0      0         0
## 3     0             0             web           0      1         0
## 4     0             0        software           1      0         0
## 5     0             0     games_video           0      0         0
## 6     0             0 network_hosting           0      0         0
##   is_enterprise is_advertising is_gamesvideo is_ecommerce is_biotech
## 1             0              0             0            0          0
## 2             1              0             0            0          0
## 3             0              0             0            0          0
## 4             0              0             0            0          0
## 5             0              0             1            0          0
## 6             0              0             0            0          0
##   is_consulting is_othercategory has_VC has_angel has_roundA has_roundB
## 1             0                1      0         1          0          0
## 2             0                0      1         0          0          1
## 3             0                0      0         0          1          0
## 4             0                0      0         0          0          1
## 5             0                0      1         1          0          0
## 6             0                1      0         0          0          1
##   has_roundC has_roundD avg_participants is_top500            Gender   Age
## 1          0          0           1.0000         0                        
## 2          1          1           4.7500         1 Prefer not to say 41-50
## 3          0          0           4.0000         1              Male 31-40
## 4          1          1           3.3333         1              Male 31-40
## 5          0          0           1.0000         1              Male 31-40
## 6          0          0           3.0000         1              Male 31-40
##     Experience Education  C1  C2  C3  C4   status
## 1    6-8 Years       PhD Yes Yes Yes Yes acquired
## 2 Over 8 Years   Masters Yes Yes Yes Yes acquired
## 3 Over 8 Years Bachelors Yes Yes Yes Yes acquired
## 4   8-10 Years   Masters Yes Yes Yes Yes acquired
## 5   8-10 Years   Masters Yes  No Yes  No   closed
## 6 Over 8 Years Bachelors Yes Yes Yes Yes   closed

The next step that would follow would be the Descriptive Research Design and for this matter therefore, there will be a run of the descriptive analytics and to start off it is wise to note that the dataset has a mix of integer, numeric and character attributes in the mix up.

## 'data.frame':    923 obs. of  51 variables:
##  $ ID                      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ state_code              : Factor w/ 35 levels "AR","AZ","CA",..: 3 3 3 3 3 3 3 3 13 3 ...
##  $ id                      : Factor w/ 922 levels "c:10054","c:101312",..: 812 171 808 593 809 311 162 699 15 675 ...
##  $ city                    : Factor w/ 221 levels "Acton","Addison",..: 173 109 173 56 174 126 126 178 215 144 ...
##  $ name                    : Factor w/ 922 levels "#waywire","1000memories",..: 77 815 613 743 367 469 665 147 859 217 ...
##  $ labels                  : int  1 1 1 1 0 0 1 1 1 1 ...
##  $ founded_at              : Factor w/ 217 levels "1/1/1984","1/1/1985",..: 16 9 107 11 184 11 14 13 11 151 ...
##  $ closed_at               : Factor w/ 203 levels "","1/1/2001",..: 1 1 1 1 26 75 1 1 1 1 ...
##  $ first_funding_at        : Factor w/ 585 levels "1/1/2000","1/1/2001",..: 296 217 279 219 491 460 559 514 507 206 ...
##  $ last_funding_at         : Factor w/ 680 levels "1/1/2001","1/1/2004",..: 8 206 323 380 341 534 298 112 281 274 ...
##  $ age_first_funding_year  : num  2.25 5.13 1.03 3.13 0 ...
##  $ age_last_funding_year   : num  3 10 1.03 5.32 1.67 ...
##  $ age_first_milestone_year: num  4.6685 7.0055 1.4575 6.0027 0.0384 ...
##  $ age_last_milestone_year : num  6.7041 7.0055 2.2055 6.0027 0.0384 ...
##  $ relationships           : int  3 9 5 5 2 3 6 25 13 14 ...
##  $ funding_rounds          : int  3 4 1 3 2 1 3 3 3 3 ...
##  $ funding_total_usd       : num  375000 40100000 2600000 40000000 1300000 7500000 26000000 34100000 9650000 5750000 ...
##  $ milestones              : int  3 1 2 1 1 1 2 3 4 4 ...
##  $ is_CA                   : int  1 1 1 1 1 1 1 1 0 1 ...
##  $ is_NY                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_MA                   : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ is_TX                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_otherstate           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ category_code           : Factor w/ 35 levels "advertising",..: 20 9 35 31 12 21 31 11 19 35 ...
##  $ is_software             : int  0 0 0 1 0 0 1 0 0 0 ...
##  $ is_web                  : int  0 0 1 0 0 0 0 0 0 1 ...
##  $ is_mobile               : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ is_enterprise           : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ is_advertising          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_gamesvideo           : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ is_ecommerce            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_biotech              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_consulting           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_othercategory        : int  1 0 0 0 0 1 0 1 0 0 ...
##  $ has_VC                  : int  0 1 0 0 1 0 1 0 1 1 ...
##  $ has_angel               : int  1 0 0 0 1 0 0 0 0 1 ...
##  $ has_roundA              : int  0 0 1 0 0 0 1 1 1 1 ...
##  $ has_roundB              : int  0 1 0 1 0 1 1 1 0 0 ...
##  $ has_roundC              : int  0 1 0 1 0 0 0 0 0 0 ...
##  $ has_roundD              : int  0 1 0 1 0 0 0 1 1 0 ...
##  $ avg_participants        : num  1 4.75 4 3.33 1 ...
##  $ is_top500               : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Gender                  : Factor w/ 4 levels "","Female","Male",..: 1 4 3 3 3 3 3 3 3 3 ...
##  $ Age                     : Factor w/ 54 levels "","31-40","41-100",..: 1 5 2 2 2 2 2 2 2 2 ...
##  $ Experience              : Factor w/ 3 levels "6-8 Years","8-10 Years",..: 1 3 3 2 2 3 3 3 3 3 ...
##  $ Education               : Factor w/ 3 levels "Bachelors","Masters",..: 3 2 1 2 2 1 1 1 1 2 ...
##  $ C1                      : Factor w/ 1 level "Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ C2                      : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 2 2 2 2 ...
##  $ C3                      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 2 2 1 ...
##  $ C4                      : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 2 1 ...
##  $ status                  : Factor w/ 2 levels "acquired","closed": 1 1 1 1 2 2 1 1 1 1 ...
##        ID          state_code         id                 city    
##  Min.   :  1.0   CA     :488   c:28482 :  2   San Francisco:128  
##  1st Qu.:231.5   NY     :106   c:10054 :  1   New York     : 91  
##  Median :462.0   MA     : 83   c:101312:  1   Mountain View: 47  
##  Mean   :462.0   TX     : 42   c:10137 :  1   Palo Alto    : 35  
##  3rd Qu.:692.5   WA     : 42   c:10153 :  1   Austin       : 27  
##  Max.   :923.0   CO     : 19   c:10158 :  1   Santa Clara  : 27  
##                  (Other):143   (Other) :916   (Other)      :568  
##               name         labels          founded_at     closed_at  
##  Redwood Systems:  2   Min.   :0.0000   1/1/2003: 55           :588  
##  #waywire       :  1   1st Qu.:0.0000   1/1/2002: 54   6/1/2013: 25  
##  1000memories   :  1   Median :1.0000   1/1/2005: 54   1/1/2012: 24  
##  41st Parameter :  1   Mean   :0.6468   1/1/2006: 54   7/1/2013: 15  
##  5min Media     :  1   3rd Qu.:1.0000   1/1/2000: 53   5/1/2013: 12  
##  Aardvark       :  1   Max.   :1.0000   1/1/2004: 50   1/1/2011:  8  
##  (Other)        :916                    (Other) :603   (Other) :251  
##   first_funding_at  last_funding_at age_first_funding_year
##  1/1/2008 : 18     1/1/2008 : 11    Min.   :-9.0466       
##  1/1/2007 : 16     1/1/2012 :  6    1st Qu.: 0.5767       
##  1/1/2006 : 12     12/1/2007:  6    Median : 1.4466       
##  3/1/2006 : 10     9/1/2008 :  6    Mean   : 2.2356       
##  12/1/2006:  9     4/1/2012 :  5    3rd Qu.: 3.5753       
##  2/1/2005 :  9     7/1/2008 :  5    Max.   :21.8959       
##  (Other)  :849     (Other)  :884                          
##  age_last_funding_year age_first_milestone_year age_last_milestone_year
##  Min.   :-9.047        Min.   :-14.170          Min.   :-7.005         
##  1st Qu.: 1.670        1st Qu.:  1.252          1st Qu.: 2.930         
##  Median : 3.529        Median :  3.055          Median : 4.754         
##  Mean   : 3.931        Mean   :  3.055          Mean   : 4.754         
##  3rd Qu.: 5.560        3rd Qu.:  4.003          3rd Qu.: 6.040         
##  Max.   :21.896        Max.   : 24.685          Max.   :24.685         
##                                                                        
##  relationships    funding_rounds   funding_total_usd     milestones   
##  Min.   : 0.000   Min.   : 1.000   Min.   :1.100e+04   Min.   :0.000  
##  1st Qu.: 3.000   1st Qu.: 1.000   1st Qu.:2.725e+06   1st Qu.:1.000  
##  Median : 5.000   Median : 2.000   Median :1.000e+07   Median :2.000  
##  Mean   : 7.711   Mean   : 2.311   Mean   :2.542e+07   Mean   :1.842  
##  3rd Qu.:10.000   3rd Qu.: 3.000   3rd Qu.:2.472e+07   3rd Qu.:3.000  
##  Max.   :63.000   Max.   :10.000   Max.   :5.700e+09   Max.   :8.000  
##                                                                       
##      is_CA            is_NY            is_MA             is_TX       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.5276   Mean   :0.1148   Mean   :0.08992   Mean   :0.0455  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##                                                                      
##  is_otherstate       category_code  is_software         is_web     
##  Min.   :0.000   software   :153   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.000   web        :144   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.000   mobile     : 79   Median :0.0000   Median :0.000  
##  Mean   :0.221   enterprise : 73   Mean   :0.1658   Mean   :0.156  
##  3rd Qu.:0.000   advertising: 62   3rd Qu.:0.0000   3rd Qu.:0.000  
##  Max.   :1.000   games_video: 52   Max.   :1.0000   Max.   :1.000  
##                  (Other)    :360                                   
##    is_mobile       is_enterprise     is_advertising    is_gamesvideo    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.08559   Mean   :0.07909   Mean   :0.06717   Mean   :0.05634  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##                                                                         
##   is_ecommerce       is_biotech      is_consulting     is_othercategory
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.02709   Mean   :0.03684   Mean   :0.00325   Mean   :0.3229  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.0000  
##                                                                        
##      has_VC         has_angel        has_roundA       has_roundB    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :1.0000   Median :0.0000  
##  Mean   :0.3261   Mean   :0.2546   Mean   :0.5081   Mean   :0.3922  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##    has_roundC       has_roundD      avg_participants   is_top500     
##  Min.   :0.0000   Min.   :0.00000   Min.   : 1.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.: 1.500   1st Qu.:1.0000  
##  Median :0.0000   Median :0.00000   Median : 2.500   Median :1.0000  
##  Mean   :0.2329   Mean   :0.09967   Mean   : 2.839   Mean   :0.8093  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.: 3.800   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.00000   Max.   :16.000   Max.   :1.0000  
##                                                                      
##                Gender         Age             Experience      Education  
##                   : 52   31-40  :819   6-8 Years   : 52   Bachelors:562  
##  Female           : 51          : 52   8-10 Years  :104   Masters  :309  
##  Male             :768   41-100 :  1   Over 8 Years:767   PhD      : 52  
##  Prefer not to say: 52   41-101 :  1                                     
##                          41-50  :  1                                     
##                          41-51  :  1                                     
##                          (Other): 48                                     
##    C1        C2        C3        C4           status   
##  Yes:923   No :256   No :153   No :307   acquired:597  
##            Yes:667   Yes:770   Yes:616   closed  :326  
##                                                        
##                                                        
##                                                        
##                                                        
## 
## [1] "Those that were acquired totaled to 597 and those that were closed totaled to 326. From this data summary and summation, it is as clear, that a considerable number of firms had struggled in their operations and were eventually closed whereas others must have been promising enough but due to lack of enough funds of operations were instead acquired. The main reasons for acquisition will be largely expounded on under the discussion section. To this point though, it is only wise to note that the results do dictate the fact that, there is a clear view that more firms were performing close to better and looked promising and due to this fact, could easily be selected for acquisition"
##    vars   n mean   sd median trimmed  mad   min  max range skew kurtosis   se
## X1    1 923 2.24 2.51   1.45    1.89 1.77 -9.05 21.9 30.94  2.1     9.97 0.08
##    vars   n mean   sd median trimmed  mad   min  max range skew kurtosis  se
## X1    1 923 3.93 2.97   3.53    3.65 2.88 -9.05 21.9 30.94 1.09     3.12 0.1
##    vars   n mean   sd median trimmed  mad    min   max range skew kurtosis   se
## X1    1 923 3.06 2.72   3.06    2.83 2.19 -14.17 24.68 38.85 1.03     7.27 0.09
##    vars   n mean   sd median trimmed  mad   min   max range skew kurtosis  se
## X1    1 923 4.75 2.94   4.75     4.6 2.58 -7.01 24.68 31.69 0.78     3.04 0.1
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 923 7.71 7.27      5     6.5 4.45   0  63    63 2.32     8.56 0.24
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 923 2.31 1.39      2     2.1 1.48   1  10     9 1.35     2.23 0.05
##    vars   n     mean        sd median  trimmed      mad   min     max
## X1    1 923 25419749 189634364  1e+07 13381063 12883794 11000 5.7e+09
##         range  skew kurtosis      se
## X1 5699989000 29.06   865.75 6241891
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 923 1.84 1.32      2    1.75 1.48   0   8     8 0.58     0.25 0.04

#Getting to know the statistical distribution through Histograms

The dataset from the above histogram plot, clearly indicates a normally distributed data points with the peak at the center and yet the shorter data points spread out to the ends of the data points

For the above two cases, most firms, got their first and last funding in the periods that were between 0 and 5 years on total. These years are the most crucial years of firm and a firm in most cases does not look at expanding its operations, but looks forward into expanding its operations instead. The firms that receive funding years later than 5 years of existence on the other side tends to be seeking expansion funds and that is allowed as this stage needs greater investments.

The number of relationships as indicated by the above graph has it that of the firms that portrayed levels of having relationships with other firms, most firms in the list had relationship periods that spanned from 0 years to 5 years and that number kept dropping and from here the data is highly skewed to the right.

The data for the number of funding given to firms indicates that, number if round that firms were funded ranged between 1 and 2 and then the data frequency drops and therefore the dataset is skewed to the right. Most firms are just well on their own after the first or even second funding and that’s why there are fewer funding frequencies as numbers move along.

Doing bi-variate analysis

## df$status: acquired
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 5.970e+04 4.575e+06 1.270e+07 3.105e+07 2.706e+07 5.700e+09 
## ------------------------------------------------------------ 
## df$status: closed
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     11000   1000000   5000000  15115322  16525000 510000000
## df$status: acquired
## [1] 31046622
## ------------------------------------------------------------ 
## df$status: closed
## [1] 15115322
## df$status: acquired
## [1] 12700000
## ------------------------------------------------------------ 
## df$status: closed
## [1] 5e+06
## df$status: acquired
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   2.000   2.162   3.000   8.000 
## ------------------------------------------------------------ 
## df$status: closed
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   1.000   1.255   2.000   6.000
## df$status: acquired
## [1] 2.162479
## ------------------------------------------------------------ 
## df$status: closed
## [1] 1.254601
## df$status: acquired
## [1] 2
## ------------------------------------------------------------ 
## df$status: closed
## [1] 1

Density plot

Building Linear Regression Model

## 
## Call:
## lm(formula = milestones ~ age_first_funding_year + age_last_funding_year + 
##     age_first_milestone_year + age_last_milestone_year + relationships + 
##     funding_rounds + funding_total_usd, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6120 -0.5293  0.0425  0.5834  3.6984 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               8.947e-01  8.668e-02  10.322  < 2e-16 ***
## age_first_funding_year   -4.403e-02  3.090e-02  -1.425  0.15457    
## age_last_funding_year    -7.552e-02  2.913e-02  -2.592  0.00968 ** 
## age_first_milestone_year -2.353e-01  2.038e-02 -11.543  < 2e-16 ***
## age_last_milestone_year   2.804e-01  1.850e-02  15.156  < 2e-16 ***
## relationships             7.149e-02  5.226e-03  13.681  < 2e-16 ***
## funding_rounds            7.958e-02  3.892e-02   2.045  0.04117 *  
## funding_total_usd        -2.657e-10  1.732e-10  -1.534  0.12533    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9848 on 915 degrees of freedom
## Multiple R-squared:  0.4498, Adjusted R-squared:  0.4456 
## F-statistic: 106.9 on 7 and 915 DF,  p-value: < 2.2e-16
## [1] 634
## [1] 289
##          5          8         13         19         23         24 
## 0.69664595 0.01081804 0.78656464 0.12708446 0.78663663 0.27325968
## [1] 0.001978452 1.000000000
##           
##            FALSE TRUE
##   acquired   158   32
##   closed      42   57
##           
##            FALSE TRUE
##   acquired   135   55
##   closed      29   70

It is clearly evident that, only the attributes age_first_funding_year and funding_total_usd are not statistically significant and for that matter and the remaining attributes, those that are statistically significant will be used for building the classification algorithms.

##          5          8         13         19         23         24 
## 0.69664595 0.01081804 0.78656464 0.12708446 0.78663663 0.27325968
## [1] 1.3033540 0.9891820 1.2134354 0.8729155 1.2133634 1.7267403
##    Actual  Predicted    error1
## 5       2 0.69664595 1.3033540
## 8       1 0.01081804 0.9891820
## 13      2 0.78656464 1.2134354
## 19      1 0.12708446 0.8729155
## 23      2 0.78663663 1.2133634
## 24      2 0.27325968 1.7267403
## [1] 1.07117
## [1] 0.9856348
## [1] 0.3569257
## [1] 3.264751
## NULL

Using the statistically significant attributes and splitting the main dataset in a 70:30 ratio for train and test sets respectively, the developed model was trained using 632 rows and tested on a test data with 291 rows. Of the developed model, the accuracy rate was ranging between values of 6.234853e-06 and 8.097985e-01.

From the above codeline, the highest value of perfromance is when the threshhold is at 0.58 With the accuracy degree of 0.74 which is 74%. For there to be highest level and most accurate predictions.

## 
## Classification tree:
## tree(formula = status ~ age_first_funding_year + age_last_funding_year + 
##     age_first_milestone_year + age_last_milestone_year + relationships + 
##     funding_rounds + funding_total_usd + milestones + avg_participants, 
##     data = df)
## Variables actually used in tree construction:
## [1] "relationships"           "funding_total_usd"      
## [3] "milestones"              "age_last_milestone_year"
## [5] "age_last_funding_year"   "age_first_funding_year" 
## [7] "avg_participants"       
## Number of terminal nodes:  10 
## Residual mean deviance:  0.9438 = 861.7 / 913 
## Misclassification error rate: 0.2243 = 207 / 923

##           tree.pred
##            acquired closed
##   acquired      183      7
##   closed         61     38
## $size
## [1] 9 8 5 2 1
## 
## $dev
## [1] 163 160 159 164 227
## 
## $k
## [1]      -Inf  0.000000  1.333333  7.000000 69.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

##           tree.pred
##            acquired closed
##   acquired      183      7
##   closed         61     38
## 
## Call:
##  randomForest(formula = status ~ age_first_funding_year + age_last_funding_year +      age_first_milestone_year + age_last_milestone_year + relationships +      funding_rounds + funding_total_usd + milestones + avg_participants,      data = train) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 21.92%
## Confusion matrix:
##          acquired closed class.error
## acquired      368     39   0.0958231
## closed        100    127   0.4405286
##           rp1
##            acquired closed
##   acquired      174     16
##   closed         34     65

##                          MeanDecreaseGini
## age_first_funding_year           36.20969
## age_last_funding_year            31.33394
## age_first_milestone_year         27.17028
## age_last_milestone_year          38.22115
## relationships                    56.50446
## funding_rounds                   14.78873
## funding_total_usd                42.08283
## milestones                       23.11928
## avg_participants                 20.88417
## [1] 7201 6978 5702 6029 5665 3206 7633 2831 4810
## k-Nearest Neighbors 
## 
## 634 samples
##   9 predictor
##   2 classes: 'acquired', 'closed' 
## 
## Pre-processing: centered (9), scaled (9) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 571, 571, 572, 570, 570, 571, ... 
## Resampling results across tuning parameters:
## 
##   k   ROC        Sens       Spec     
##    1  0.6662039  0.7951220  0.5372859
##    2  0.7027739  0.7936789  0.5390646
##    3  0.7225433  0.8444309  0.5155468
##    4  0.7326698  0.8329268  0.5364295
##    5  0.7437672  0.8575813  0.5125823
##    6  0.7554646  0.8592073  0.5099473
##    7  0.7628925  0.8648780  0.5258893
##    8  0.7674427  0.8673374  0.5389987
##    9  0.7700594  0.8738211  0.5170619
##   10  0.7720584  0.8722358  0.5272069
##   11  0.7734950  0.8796138  0.5215415
##   12  0.7712766  0.8665854  0.5169302
##   13  0.7742161  0.8788008  0.5198287
##   14  0.7770114  0.8713211  0.5125823
##   15  0.7786278  0.8780081  0.5185771
##   16  0.7774859  0.8731098  0.5214756
##   17  0.7770979  0.8821341  0.5095520
##   18  0.7789911  0.8804472  0.5065217
##   19  0.7771332  0.8771545  0.5094862
##   20  0.7791949  0.8804065  0.5080369
##   21  0.7803944  0.8755285  0.5021739
##   22  0.7811409  0.8714228  0.4991436
##   23  0.7839361  0.8763415  0.5050725
##   24  0.7839984  0.8754878  0.4991436
##   25  0.7833236  0.8771545  0.5021080
##   26  0.7844370  0.8853455  0.5065876
##   27  0.7854299  0.8820732  0.5050725
##   28  0.7877190  0.8836585  0.5094862
##   29  0.7895708  0.8869715  0.5065876
##   30  0.7902137  0.8927236  0.5080369
##   31  0.7907341  0.8943293  0.5109354
##   32  0.7909993  0.8984146  0.5094203
##   33  0.7928511  0.8984146  0.5095520
##   34  0.7942122  0.8976220  0.5050725
##   35  0.7950387  0.9025813  0.5110672
##   36  0.7948743  0.9026016  0.5020422
##   37  0.7964964  0.9034553  0.5065876
##   38  0.7972950  0.8992886  0.5066535
##   39  0.7990403  0.9010163  0.5066535
##   40  0.7993030  0.9034756  0.5007246
##   41  0.8002782  0.9010163  0.5063900
##   42  0.7994470  0.9026220  0.5036232
##   43  0.7977072  0.9017683  0.5050066
##   44  0.7989044  0.9009146  0.5065217
##   45  0.7993054  0.9001220  0.5065876
##   46  0.7975124  0.8976016  0.5110672
##   47  0.7966311  0.8967886  0.5024374
##   48  0.7955276  0.8968699  0.5007246
##   49  0.7957459  0.8984959  0.5052701
##   50  0.7965302  0.8976829  0.5067194
##   51  0.7970009  0.8968699  0.5022398
##   52  0.7966643  0.8952439  0.5022398
##   53  0.7955690  0.8968699  0.5038867
##   54  0.7959459  0.8976626  0.4965086
##   55  0.7963677  0.8976829  0.4935441
##   56  0.7971509  0.8935976  0.4936100
##   57  0.7979647  0.8960366  0.4949934
##   58  0.7985234  0.8960366  0.4965086
##   59  0.7990948  0.8927846  0.4921607
##   60  0.7995663  0.8927642  0.4877470
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was k = 41.

## ROC curve variable importance
## 
##                          Importance
## relationships                100.00
## milestones                    71.57
## funding_rounds                50.77
## age_last_milestone_year       49.35
## funding_total_usd             40.54
## avg_participants              37.29
## age_first_milestone_year      28.30
## age_last_funding_year         17.16
## age_first_funding_year         0.00
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction acquired closed
##   acquired      172     45
##   closed         18     54
##                                           
##                Accuracy : 0.782           
##                  95% CI : (0.7299, 0.8282)
##     No Information Rate : 0.6574          
##     P-Value [Acc > NIR] : 2.573e-06       
##                                           
##                   Kappa : 0.4822          
##                                           
##  Mcnemar's Test P-Value : 0.001054        
##                                           
##             Sensitivity : 0.9053          
##             Specificity : 0.5455          
##          Pos Pred Value : 0.7926          
##          Neg Pred Value : 0.7500          
##              Prevalence : 0.6574          
##          Detection Rate : 0.5952          
##    Detection Prevalence : 0.7509          
##       Balanced Accuracy : 0.7254          
##                                           
##        'Positive' Class : acquired        
## 
## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = status ~ age_first_funding_year + 
##     age_last_funding_year + age_first_milestone_year + age_last_milestone_year + 
##     relationships + funding_rounds + funding_total_usd + milestones + 
##     avg_participants, data = train, usekernel = T)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
##  acquired    closed 
## 0.6419558 0.3580442 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: age_first_funding_year::acquired (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (407 obs.);  Bandwidth 'bw' = 0.5093
## 
##        x                 y            
##  Min.   :-2.0264   Min.   :6.751e-05  
##  1st Qu.: 0.7464   1st Qu.:2.152e-02  
##  Median : 3.5192   Median :6.654e-02  
##  Mean   : 3.5192   Mean   :9.007e-02  
##  3rd Qu.: 6.2920   3rd Qu.:1.309e-01  
##  Max.   : 9.0648   Max.   :2.905e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_first_funding_year::closed (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (227 obs.);  Bandwidth 'bw' = 0.7533
## 
##        x                 y            
##  Min.   :-11.306   Min.   :8.000e-08  
##  1st Qu.: -2.713   1st Qu.:4.083e-04  
##  Median :  5.879   Median :2.328e-03  
##  Mean   :  5.879   Mean   :2.906e-02  
##  3rd Qu.: 14.472   3rd Qu.:2.013e-02  
##  Max.   : 23.065   Max.   :2.224e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_last_funding_year::acquired (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (407 obs.);  Bandwidth 'bw' = 0.6932
## 
##        x                y            
##  Min.   :-2.493   Min.   :1.663e-05  
##  1st Qu.: 1.626   1st Qu.:7.015e-03  
##  Median : 5.745   Median :4.243e-02  
##  Mean   : 5.745   Mean   :6.063e-02  
##  3rd Qu.: 9.864   3rd Qu.:1.167e-01  
##  Max.   :13.984   Max.   :1.511e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_last_funding_year::closed (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (227 obs.);  Bandwidth 'bw' = 0.9765
## 
##        x                 y            
##  Min.   :-11.976   Min.   :5.430e-06  
##  1st Qu.: -3.048   1st Qu.:8.298e-04  
##  Median :  5.879   Median :2.983e-03  
##  Mean   :  5.879   Mean   :2.797e-02  
##  3rd Qu.: 14.807   3rd Qu.:3.354e-02  
##  Max.   : 23.735   Max.   :1.462e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_first_milestone_year::acquired (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (407 obs.);  Bandwidth 'bw' = 0.6144
## 
##        x                 y            
##  Min.   :-5.5939   Min.   :1.811e-05  
##  1st Qu.:-0.4819   1st Qu.:3.391e-03  
##  Median : 4.6302   Median :1.506e-02  
##  Mean   : 4.6302   Mean   :4.886e-02  
##  3rd Qu.: 9.7422   3rd Qu.:7.780e-02  
##  Max.   :14.8542   Max.   :2.016e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_first_milestone_year::closed (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (227 obs.);  Bandwidth 'bw' = 0.5613
## 
##        x                y            
##  Min.   :-8.689   Min.   :1.000e-08  
##  1st Qu.:-1.930   1st Qu.:6.107e-04  
##  Median : 4.829   Median :4.143e-03  
##  Mean   : 4.829   Mean   :3.695e-02  
##  3rd Qu.:11.588   3rd Qu.:3.847e-02  
##  Max.   :18.347   Max.   :2.791e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_last_milestone_year::acquired (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (407 obs.);  Bandwidth 'bw' = 0.7206
## 
##        x                 y            
##  Min.   :-5.9126   Min.   :1.571e-05  
##  1st Qu.:-0.4974   1st Qu.:2.453e-03  
##  Median : 4.9178   Median :2.446e-02  
##  Mean   : 4.9178   Mean   :4.612e-02  
##  3rd Qu.:10.3330   3rd Qu.:8.403e-02  
##  Max.   :15.7482   Max.   :1.638e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: age_last_milestone_year::closed (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (227 obs.);  Bandwidth 'bw' = 0.7199
## 
##        x                y            
##  Min.   :-9.165   Min.   :2.781e-05  
##  1st Qu.:-1.753   1st Qu.:1.268e-03  
##  Median : 5.659   Median :2.571e-03  
##  Mean   : 5.659   Mean   :3.370e-02  
##  3rd Qu.:13.071   3rd Qu.:4.343e-02  
##  Max.   :20.483   Max.   :2.384e-01  
## 
## --------------------------------------------------------------------------------- 
##  ::: relationships::acquired (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (407 obs.);  Bandwidth 'bw' = 1.817
## 
##        x                y            
##  Min.   :-5.452   Min.   :4.770e-06  
##  1st Qu.:13.024   1st Qu.:4.150e-04  
##  Median :31.500   Median :1.555e-03  
##  Mean   :31.500   Mean   :1.352e-02  
##  3rd Qu.:49.976   3rd Qu.:1.503e-02  
##  Max.   :68.452   Max.   :8.707e-02  
## 
## --------------------------------------------------------------------------------- 
##  ::: relationships::closed (KDE)
## --------------------------------------------------------------------------------- 
## 
## Call:
##  density.default(x = x, na.rm = TRUE)
## 
## Data: x (227 obs.);  Bandwidth 'bw' = 0.7943
## 
##        x                y            
##  Min.   :-2.383   Min.   :6.220e-06  
##  1st Qu.: 5.559   1st Qu.:9.871e-04  
##  Median :13.500   Median :7.235e-03  
##  Mean   :13.500   Mean   :3.145e-02  
##  3rd Qu.:21.441   3rd Qu.:4.189e-02  
##  Max.   :29.383   Max.   :1.569e-01  
## 
## ---------------------------------------------------------------------------------
## 
## # ... and 4 more tables
## 
## ---------------------------------------------------------------------------------

##    acquired     closed ID state_code      id          city
## 1 0.8994251 0.10057493  1         CA  c:6669     San Diego
## 2 0.9765062 0.02349377  2         CA c:16283     Los Gatos
## 3 0.5495379 0.45046212  3         CA c:65620     San Diego
## 4 0.9346075 0.06539251  4         CA c:42668     Cupertino
## 6 0.3733746 0.62662545  6         CA c:22898 Mountain View
## 7 0.9589714 0.04102858  7         CA c:16191 Mountain View
##                    name labels founded_at closed_at first_funding_at
## 1           Bandsintown      1   1/1/2007                   4/1/2009
## 2             TriCipher      1   1/1/2000                  2/14/2005
## 3                 Plixi      1  3/18/2009                  3/30/2010
## 4     Solidcore Systems      1   1/1/2002                  2/17/2005
## 6      Matisse Networks      0   1/1/2002 2/15/2009        7/18/2006
## 7 RingCube Technologies      1   1/1/2005                  9/21/2006
##   last_funding_at age_first_funding_year age_last_funding_year
## 1        1/1/2010                 2.2493                3.0027
## 2      12/28/2009                 5.1260                9.9973
## 3       3/30/2010                 1.0329                1.0329
## 4       4/25/2007                 3.1315                5.3151
## 6       7/18/2006                 4.5452                4.5452
## 7       3/18/2010                 1.7205                5.2110
##   age_first_milestone_year age_last_milestone_year relationships funding_rounds
## 1                   4.6685                  6.7041             3              3
## 2                   7.0055                  7.0055             9              4
## 3                   1.4575                  2.2055             5              1
## 4                   6.0027                  6.0027             5              3
## 6                   5.0027                  5.0027             3              1
## 7                   3.0000                  6.6082             6              3
##   funding_total_usd milestones is_CA is_NY is_MA is_TX is_otherstate
## 1            375000          3     1     0     0     0             0
## 2          40100000          1     1     0     0     0             0
## 3           2600000          2     1     0     0     0             0
## 4          40000000          1     1     0     0     0             0
## 6           7500000          1     1     0     0     0             0
## 7          26000000          2     1     0     0     0             0
##     category_code is_software is_web is_mobile is_enterprise is_advertising
## 1           music           0      0         0             0              0
## 2      enterprise           0      0         0             1              0
## 3             web           0      1         0             0              0
## 4        software           1      0         0             0              0
## 6 network_hosting           0      0         0             0              0
## 7        software           1      0         0             0              0
##   is_gamesvideo is_ecommerce is_biotech is_consulting is_othercategory has_VC
## 1             0            0          0             0                1      0
## 2             0            0          0             0                0      1
## 3             0            0          0             0                0      0
## 4             0            0          0             0                0      0
## 6             0            0          0             0                1      0
## 7             0            0          0             0                0      1
##   has_angel has_roundA has_roundB has_roundC has_roundD avg_participants
## 1         1          0          0          0          0           1.0000
## 2         0          0          1          1          1           4.7500
## 3         0          1          0          0          0           4.0000
## 4         0          0          1          1          1           3.3333
## 6         0          0          1          0          0           3.0000
## 7         0          1          1          0          0           1.6667
##   is_top500            Gender   Age   Experience Education  C1  C2  C3  C4
## 1         0                            6-8 Years       PhD Yes Yes Yes Yes
## 2         1 Prefer not to say 41-50 Over 8 Years   Masters Yes Yes Yes Yes
## 3         1              Male 31-40 Over 8 Years Bachelors Yes Yes Yes Yes
## 4         1              Male 31-40   8-10 Years   Masters Yes Yes Yes Yes
## 6         1              Male 31-40 Over 8 Years Bachelors Yes Yes Yes Yes
## 7         1              Male 31-40 Over 8 Years Bachelors Yes Yes  No  No
##     status
## 1 acquired
## 2 acquired
## 3 acquired
## 4 acquired
## 6   closed
## 7 acquired
##           
## p1         acquired closed
##   acquired      324     60
##   closed         83    167
## [1] 0.2255521

Conclusion-

Random Forest Model gives most accurate prediction at 76% for a company’s success or failure

There are more firms that were acquired than those that were closed

Most Managers Seem to be between age 31 to 40

A positive association exists between Project Success and Entrepreneurial Acclimatization

A positive association exists between Acclimatization in Technology and Success in Projects

A positive arbitration that exists in the relationship of Entrepreneurial Acclimatization and Project Success