Here, we present an example with real data from a 1907 paper by Francis Galton. A thorough description of the data can be found at [1]. Briefly, the data set contains \(n=787\) observations from an ox weight-judging competition. The following R code illustrates the fit of the Double two-piece (DTP) \(t\), the two-piece scale (TPSC) \(t\), and the two-piece shape (TPSH) \(t\) (see [2]), using the R package DTP. The AIC selects the DTP-\(t\) model, as described in [1], but the BIC selects the simpler TPSC-\(t\).
References.
rm(list=ls())
library(DTP)
# Real data from a weight-judging competition from an article by Francis Galton (1907).
data = c(896, 906, 912, 923, 929, 934, 936, 955, 957, 962, 972, 973,
982, 987, 1000, 1004, 1005, 1009, 1023, 1028, 1028, 1031, 1038,
1040, 1051, 1052, 1052, 1054, 1056, 1059, 1064, 1065, 1066, 1070,
1071, 1074, 1074, 1074, 1074, 1078, 1079, 1080, 1081, 1081, 1081,
1085, 1087, 1088, 1088, 1088, 1089, 1091, 1091, 1092, 1092, 1093,
1093, 1094, 1096, 1096, 1096, 1096, 1096, 1097, 1098, 1098, 1098,
1099, 1099, 1101, 1106, 1106, 1107, 1108, 1108, 1108, 1108, 1109,
1109, 1109, 1109, 1110, 1110, 1111, 1112, 1112, 1113, 1113, 1114,
1115, 1115, 1116, 1116, 1117, 1117, 1117, 1118, 1118, 1118, 1118,
1118, 1119, 1119, 1119, 1119, 1119, 1120, 1121, 1122, 1122, 1123,
1123, 1123, 1123, 1124, 1125, 1126, 1126, 1127, 1127, 1129, 1131,
1131, 1131, 1132, 1132, 1132, 1132, 1132, 1133, 1133, 1134, 1134,
1134, 1135, 1135, 1137, 1138, 1139, 1140, 1140, 1140, 1141, 1141,
1141, 1142, 1144, 1144, 1145, 1146, 1146, 1146, 1147, 1148, 1148,
1148, 1149, 1150, 1150, 1151, 1151, 1151, 1152, 1152, 1152, 1153,
1153, 1153, 1154, 1154, 1154, 1154, 1154, 1155, 1155, 1155, 1155,
1155, 1155, 1155, 1155, 1155, 1157, 1157, 1158, 1158, 1158, 1159,
1160, 1160, 1160, 1160, 1161, 1162, 1162, 1162, 1162, 1163, 1163,
1163, 1163, 1163, 1163, 1163, 1164, 1164, 1164, 1164, 1165, 1165,
1165, 1165, 1166, 1167, 1167, 1167, 1168, 1168, 1168, 1168, 1168,
1168, 1169, 1169, 1169, 1169, 1169, 1171, 1171, 1171, 1172, 1172,
1172, 1173, 1174, 1174, 1174, 1174, 1174, 1174, 1175, 1175, 1175,
1175, 1176, 1176, 1176, 1176, 1176, 1177, 1177, 1177, 1177, 1177,
1178, 1178, 1178, 1178, 1178, 1178, 1179, 1179, 1179, 1179, 1179,
1180, 1180, 1180, 1180, 1180, 1181, 1181, 1181, 1181, 1181, 1181,
1181, 1181, 1181, 1182, 1182, 1182, 1182, 1182, 1182, 1182, 1183,
1183, 1183, 1183, 1183, 1183, 1184, 1184, 1184, 1185, 1185, 1185,
1186, 1186, 1186, 1186, 1186, 1186, 1186, 1186, 1187, 1187, 1187,
1187, 1187, 1188, 1188, 1188, 1189, 1189, 1189, 1189, 1189, 1189,
1189, 1189, 1190, 1190, 1190, 1190, 1190, 1190, 1190, 1190, 1191,
1191, 1191, 1191, 1192, 1192, 1192, 1192, 1192, 1193, 1193, 1193,
1193, 1193, 1193, 1193, 1193, 1193, 1194, 1194, 1195, 1195, 1197,
1199, 1199, 1200, 1201, 1201, 1201, 1201, 1202, 1202, 1202, 1202,
1203, 1203, 1203, 1203, 1203, 1203, 1203, 1203, 1204, 1204, 1204,
1205, 1205, 1205, 1205, 1205, 1205, 1205, 1205, 1205, 1206, 1206,
1206, 1206, 1206, 1206, 1207, 1207, 1207, 1208, 1208, 1208, 1208,
1208, 1208, 1208, 1208, 1208, 1208, 1208, 1209, 1209, 1209, 1209,
1209, 1209, 1209, 1209, 1209, 1210, 1210, 1210, 1211, 1211, 1211,
1211, 1211, 1211, 1211, 1211, 1211, 1211, 1211, 1212, 1212, 1212,
1212, 1214, 1214, 1214, 1214, 1214, 1214, 1214, 1214, 1215, 1215,
1215, 1215, 1215, 1215, 1216, 1216, 1216, 1216, 1216, 1217, 1217,
1217, 1217, 1218, 1218, 1218, 1218, 1218, 1218, 1218, 1218, 1218,
1218, 1218, 1218, 1218, 1218, 1219, 1219, 1219, 1219, 1219, 1219,
1219, 1219, 1220, 1220, 1220, 1220, 1220, 1220, 1221, 1221, 1221,
1221, 1221, 1221, 1221, 1221, 1221, 1221, 1222, 1222, 1222, 1222,
1222, 1223, 1223, 1223, 1223, 1224, 1224, 1224, 1224, 1224, 1224,
1224, 1225, 1225, 1225, 1225, 1225, 1225, 1225, 1225, 1226, 1226,
1226, 1226, 1226, 1226, 1226, 1226, 1227, 1227, 1227, 1227, 1228,
1228, 1228, 1228, 1228, 1228, 1229, 1229, 1229, 1229, 1229, 1229,
1230, 1230, 1230, 1230, 1230, 1230, 1230, 1230, 1230, 1230, 1231,
1231, 1231, 1231, 1231, 1231, 1231, 1231, 1231, 1231, 1231, 1231,
1231, 1232, 1232, 1232, 1232, 1232, 1232, 1233, 1233, 1233, 1233,
1233, 1234, 1234, 1234, 1234, 1234, 1235, 1235, 1235, 1235, 1235,
1235, 1235, 1235, 1235, 1236, 1236, 1236, 1236, 1236, 1236, 1237,
1238, 1238, 1238, 1239, 1239, 1239, 1239, 1239, 1239, 1239, 1239,
1239, 1239, 1239, 1240, 1240, 1240, 1241, 1241, 1241, 1242, 1242,
1242, 1242, 1242, 1242, 1242, 1242, 1242, 1242, 1243, 1243, 1243,
1243, 1244, 1244, 1245, 1245, 1245, 1245, 1245, 1245, 1246, 1246,
1246, 1246, 1246, 1246, 1246, 1247, 1247, 1247, 1248, 1248, 1248,
1249, 1249, 1249, 1249, 1249, 1251, 1252, 1252, 1252, 1252, 1252,
1252, 1253, 1253, 1253, 1254, 1254, 1254, 1255, 1256, 1257, 1258,
1259, 1260, 1260, 1260, 1260, 1260, 1261, 1261, 1261, 1261, 1262,
1262, 1263, 1263, 1263, 1263, 1263, 1263, 1263, 1264, 1265, 1265,
1266, 1266, 1266, 1267, 1267, 1267, 1267, 1267, 1268, 1268, 1268,
1269, 1269, 1270, 1270, 1270, 1271, 1271, 1271, 1271, 1271, 1272,
1272, 1272, 1273, 1273, 1274, 1274, 1274, 1275, 1276, 1276, 1278,
1278, 1279, 1280, 1280, 1281, 1282, 1285, 1285, 1286, 1287, 1287,
1287, 1289, 1291, 1292, 1292, 1292, 1293, 1293, 1293, 1293, 1295,
1295, 1295, 1296, 1300, 1302, 1302, 1302, 1302, 1303, 1304, 1304,
1309, 1314, 1314, 1318, 1319, 1320, 1320, 1323, 1325, 1325, 1331,
1340, 1340, 1347, 1358, 1359, 1372, 1374, 1384, 1415, 1432, 1435,
1440, 1471, 1484, 1494, 1516)
n = length(data)
#########################################################################################
# DTP-t fit
#########################################################################################
# - log-likelihood function
loglik1 <- function(par){
if(par[2]>0 & par[3]>-1 & par[3]<1 & par[4]>0 & par[5]>0) return(- sum(ddtp(data,par[1],par[2],par[3],par[4],par[5],param="eps",dt,log=T)))
else return(Inf)
}
# Optimisation
OPT1 <- optim(c(1200,50,0,5,5),loglik1,control=list(maxit=10000),method="BFGS")
AIC1 <- 2*OPT1$value + 2*5
BIC1 <- 2*OPT1$value + log(n)*5
MLE1 <- OPT1$par
#########################################################################################
# TPSC-t fit
#########################################################################################
# - log-likelihood function
loglik2 <- function(par){
if(par[2]>0 & par[3]>-1 & par[3]<1 & par[4]>0) return(- sum(ddtp(data,par[1],par[2],par[3],par[4],par[4],param="eps",dt,log=T)))
else return(Inf)
}
# Optimisation
OPT2 <- optim(c(1200,50,0,5),loglik2,control=list(maxit=10000),method="BFGS")
AIC2 <- 2*OPT2$value + 2*4
BIC2 <- 2*OPT2$value + log(n)*4
MLE2 <- OPT2$par
#########################################################################################
# TPSH-t fit
#########################################################################################
# - log-likelihood function
loglik3 <- function(par){
if(par[2]>0 & par[3]>0 & par[4]>0) return(- sum(ddtp(data,par[1],par[2],0,par[3],par[4],param="eps",dt,log=T)))
else return(Inf)
}
# Optimisation
OPT3 <- optim(c(1200,50,5,5),loglik3,control=list(maxit=10000),method="BFGS")
AIC3 <- 2*OPT3$value + 2*4
BIC3 <- 2*OPT3$value + log(n)*4
MLE3 <- OPT3$par
#########################################################################################
# Model comparison
#########################################################################################
# AIC
c(AIC1,AIC2,AIC3)
## [1] 8864.332 8866.864 8885.947
# BIC
c(BIC1,BIC2,BIC3)
## [1] 8887.673 8885.537 8904.620
# Estimated densities
tempf1 <- function(x) ddtp(x,MLE1[1],MLE1[2],MLE1[3],MLE1[4],MLE1[5],param="eps",dt)
tempf2 <- function(x) ddtp(x,MLE2[1],MLE2[2],MLE2[3],MLE2[4],MLE2[4],param="eps",dt)
tempf3 <- function(x) ddtp(x,MLE3[1],MLE3[2],0,MLE3[3],MLE3[4],param="eps",dt)
hist(data, breaks = 25 ,probability=T,xlim=c(800,1600),ylim=c(0,0.01))
curve(tempf1,800,1600,add=T,lwd=2,lty=1)
curve(tempf2,800,1600,add=T,lwd=2,lty=2)
curve(tempf3,800,1600,add=T,lwd=2,lty=3)
box()
legend(1400, 0.009, c("DTP","TPSC", "TPSH"),
text.col = "black", lty = c(1, 2, 3),
merge = TRUE, bg = "gray90")