Datos nominales y ordinales 23 de septiembre de 2015 Datos: carros<-read.csv(’carros.csv’) Modelo logistico nominal: library(VGAM) ## Loading required package: stats4 ## Loading required package: splines carros_mod<-read.csv(’carros_mod.csv’) modelo1<-vglm(cbind(y1,y2,y3)~sex+age,family = multinomial,data = carros_mod) summary(modelo1) ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Call: vglm(formula = cbind(y1, y2, y3) ~ sex + age, family = multinomial, data = carros_mod) Pearson residuals: log(mu[,1]/mu[,3]) log(mu[,2]/mu[,3]) 1 0.7377 -0.077667 2 -0.3497 0.835541 3 -0.4440 -0.583338 4 -0.6930 -0.009047 5 0.2174 -0.875593 6 0.4829 0.700820 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept):1 -1.0647 0.3496 -3.045 0.002327 ** (Intercept):2 -0.4559 0.2878 -1.584 0.113253 sexwomen:1 -0.8130 0.3210 -2.532 0.011326 * sexwomen:2 -0.4249 0.3006 -1.414 0.157473 age18-23:1 2.9168 0.4229 6.897 5.32e-12 *** age18-23:2 1.3290 0.3925 3.386 0.000709 *** age24-40:1 1.4386 0.4158 3.460 0.000541 *** age24-40:2 0.9792 0.3424 2.860 0.004241 ** --Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 Number of linear predictors: 2 Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3]) Dispersion Parameter for multinomial family: 1 1 ## ## Residual deviance: 3.9387 on 4 degrees of freedom ## ## Log-likelihood: -25.3704 on 4 degrees of freedom ## ## Number of iterations: 3 Modelo logistico nominal (con otra codificación): modelo1<-vglm(cbind(y1,y2,y3)~sex+age,family = multinomial(refLevel = 1),data = carros_mod, contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) summary(modelo1) ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Call: vglm(formula = cbind(y1, y2, y3) ~ sex + age, family = multinomial(refLevel = 1), data = carros_mod, contrasts = list(sex = contr.treatment(2, 2), age = contr.treatment(3, 2))) Pearson residuals: log(mu[,2]/mu[,1]) log(mu[,3]/mu[,1]) 1 -0.4396 -0.5975 2 0.8953 -0.1375 3 -0.2575 0.6864 4 0.3559 0.5946 5 -0.8602 0.2718 6 0.3519 -0.7749 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept):1 -0.5908 0.2840 -2.080 0.037484 * (Intercept):2 -1.0391 0.3305 -3.144 0.001667 ** sex1:1 -0.3881 0.3005 -1.292 0.196510 sex1:2 -0.8130 0.3210 -2.532 0.011326 * age1:1 1.5877 0.4029 3.941 8.12e-05 *** age1:2 2.9168 0.4229 6.897 5.32e-12 *** age3:1 1.1283 0.3416 3.302 0.000958 *** age3:2 1.4781 0.4009 3.687 0.000227 *** --Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 Number of linear predictors: 2 Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1]) Dispersion Parameter for multinomial family: 1 Residual deviance: 3.9387 on 4 degrees of freedom Log-likelihood: -25.3704 on 4 degrees of freedom Number of iterations: 3 Modelo reducido 1: 2 modelo1a<-vglm(cbind(y1,y2,y3)~sex,family = multinomial(refLevel = 1),data = carros_mod, contrasts = list(sex=contr.treatment(2,2))) DeltaDa<-deviance(modelo1a)-deviance(modelo1) pchisq(DeltaDa,df = 8-4,lower.tail = F) ## [1] 1.94304e-13 Modelo reducido 2: modelo1b<-vglm(cbind(y1,y2,y3)~age,family = multinomial(refLevel = 1),data = carros_mod, contrasts = list(age=contr.treatment(3,2))) DeltaDb<-deviance(modelo1b)-deviance(modelo1) pchisq(DeltaDb,df = 6-4,lower.tail = F) ## [1] 0.03867396 Comparación modelo completo-modelo nulo modelo1_r<-vglm(cbind(y1,y2,y3)~1,family = multinomial(refLevel = 1),data = carros_mod) DeltaDc<-deviance(modelo1_r)-deviance(modelo1) pchisq(DeltaDc,df = 10-4,lower.tail = F) ## [1] 9.965531e-15 Seudo-R2 R2<-(deviance(modelo1_r)-deviance(modelo1))/deviance(modelo1_r) R2 ## [1] 0.951838 Bondad de ajuste: pchisq(deviance(modelo1),df = 4,lower.tail = F) ## [1] 0.4143637 Modelos no agrupados. Convertir a una tabla tipo Tidy: library(dplyr) ## ## Attaching package: ’dplyr’ ## ## The following object is masked from ’package:stats’: ## ## filter ## ## The following objects are masked from ’package:base’: ## ## intersect, setdiff, setequal, union 3 library(tidyr) colnames(carros_mod)[3:5]<-c(’No’,’Importante’,’MuyImp’) carros_temp<-carros_mod%>%gather(respuesta,frecuencia,-sex,-age) Ncarros_temp<-dim(carros_temp)[1] carros_ind<-NULL for(i in 1:Ncarros_temp){ carros_ind<-rbind(carros_ind,carros_temp[rep(i,carros_temp[i,4]),] ) } carros_ind<-carros_ind[,-4] Modelo completo individual: modelo_ind<-vglm(respuesta~sex+age,family = multinomial(refLevel = 1),data = carros_ind, contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) Modelo nulo individual: modelo_ind_null<-vglm(respuesta~1,family = multinomial(refLevel = 1),data = carros_ind) R2_ind<-(deviance(modelo_ind_null)-deviance(modelo_ind))/deviance(modelo_ind_null) R2_ind ## [1] 0.118203 DeltaD_ind<-deviance(modelo_ind_null)-deviance(modelo_ind) pchisq(DeltaDc,df = 598-592,lower.tail = F) ## [1] 9.965531e-15 Predicciones: predicciones_ind<-predict(modelo_ind,se.fit=T) Datos ordinales Modelo acumulativo: modelo_acum1<-vglm(cbind(No,Importante,MuyImp)~sex+age,family = cumulative(),data = carros_mod, contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) summary(modelo_acum1) ## ## Call: ## vglm(formula = cbind(No, Importante, MuyImp) ~ sex + age, family = cumulative(), ## data = carros_mod, contrasts = list(sex = contr.treatment(2, ## 2), age = contr.treatment(3, 2))) ## ## Pearson residuals: ## logit(P[Y<=1]) logit(P[Y<=2]) 4 ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 1 2 3 4 5 6 0.7937 -0.6680 -0.3285 -0.6911 0.5733 0.3400 0.2375 0.5315 -0.6280 -0.2243 -0.6273 0.7308 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept):1 0.06428 0.24864 0.259 0.79601 (Intercept):2 1.52613 0.30782 4.958 7.13e-07 *** sex1:1 0.59071 0.26736 2.209 0.02714 * sex1:2 0.57225 0.26851 2.131 0.03307 * age1:1 -2.25992 0.35864 -6.301 2.95e-10 *** age1:2 -2.10268 0.34508 -6.093 1.11e-09 *** age3:1 -1.24641 0.30572 -4.077 4.56e-05 *** age3:2 -0.94903 0.36284 -2.616 0.00891 ** --Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 Number of linear predictors: 2 Names of linear predictors: logit(P[Y<=1]), logit(P[Y<=2]) Dispersion Parameter for cumulative family: 1 Residual deviance: 3.8306 on 4 degrees of freedom Log-likelihood: -25.3164 on 4 degrees of freedom Number of iterations: 5 Modelo acumulativo con paralelismo (odds proporcionales): modelo_acum2<-vglm(cbind(No,Importante,MuyImp)~sex+age,family = cumulative(parallel = T),data = carros_m contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) summary(modelo_acum2) ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Call: vglm(formula = cbind(No, Importante, MuyImp) ~ sex + age, family = cumulative(parallel = T), data = carros_mod, contrasts = list(sex = contr.treatment(2, 2), age = contr.treatment(3, 2))) Pearson residuals: logit(P[Y<=1]) logit(P[Y<=2]) 1 0.9579 -0.09868 2 -0.9674 0.84653 3 -0.3441 -0.61980 4 -0.4223 -0.64680 5 0.3105 -0.38184 6 0.3644 0.71420 5 ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept):1 0.04354 0.23030 0.189 0.8501 (Intercept):2 1.65498 0.25360 6.526 6.76e-11 *** sex1 0.57622 0.22611 2.548 0.0108 * age1 -2.23246 0.29042 -7.687 1.50e-14 *** age3 -1.14710 0.27727 -4.137 3.52e-05 *** --Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 Number of linear predictors: 2 Names of linear predictors: logit(P[Y<=1]), logit(P[Y<=2]) Dispersion Parameter for cumulative family: 1 Residual deviance: 4.5321 on 7 degrees of freedom Log-likelihood: -25.6671 on 7 degrees of freedom Number of iterations: 4 Prueba de paralelismo: DeltaDd<-deviance(modelo_acum2)-deviance(modelo_acum1) pchisq(DeltaDd,df = 3,lower.tail = F) ## [1] 0.8728591 Modelo con categorías adyacentes: modelo_acat1<-vglm(cbind(No,Importante,MuyImp)~sex+age,family = acat(),data = carros_mod, contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) summary(modelo_acat1) ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Call: vglm(formula = cbind(No, Importante, MuyImp) ~ sex + age, family = acat(), data = carros_mod, contrasts = list(sex = contr.treatment(2, 2), age = contr.treatment(3, 2))) Pearson residuals: loge(P[Y=2]/P[Y=1]) loge(P[Y=3]/P[Y=2]) 1 -0.6601 -0.3383 2 0.6975 -0.5778 3 0.1826 0.7100 4 0.5629 0.4043 5 -0.6334 0.6424 6 -0.1280 -0.8414 Coefficients: Estimate Std. Error z value Pr(>|z|) 6 ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## (Intercept):1 (Intercept):2 sex1:1 sex1:2 age1:1 age1:2 age3:1 age3:2 --Signif. codes: -0.5908 -0.4483 -0.3881 -0.4249 1.5877 1.3290 1.1283 0.3498 0.2840 0.3509 0.3005 0.3006 0.4029 0.3925 0.3416 0.4089 -2.080 -1.278 -1.292 -1.414 3.941 3.386 3.302 0.856 0.037484 0.201400 0.196510 0.157473 8.12e-05 0.000709 0.000958 0.392198 * *** *** *** 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 Number of linear predictors: 2 Names of linear predictors: loge(P[Y=2]/P[Y=1]), loge(P[Y=3]/P[Y=2]) Dispersion Parameter for acat family: 1 Residual deviance: 3.9387 on 4 degrees of freedom Log-likelihood: -25.3704 on 4 degrees of freedom Number of iterations: 3 Modelo con categorías adyacentes con paralelismo: modelo_acat2<-vglm(cbind(No,Importante,MuyImp)~sex+age,family = acat(parallel = T),data = carros_mod, contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) summary(modelo_acat2) ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Call: vglm(formula = cbind(No, Importante, MuyImp) ~ sex + age, family = acat(parallel = T), data = carros_mod, contrasts = list(sex = contr.treatment(2, 2), age = contr.treatment(3, 2))) Pearson residuals: loge(P[Y=2]/P[Y=1]) loge(P[Y=3]/P[Y=2]) 1 -0.95852 0.1011 2 1.13048 -1.0878 3 0.05493 0.8325 4 0.22502 0.8662 5 -0.11790 0.1985 6 -0.25845 -0.7854 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept):1 -0.4446 0.1935 -2.298 0.02158 * (Intercept):2 -0.6868 0.2280 -3.012 0.00259 ** sex1 -0.4065 0.1605 -2.532 0.01134 * age1 1.5196 0.2113 7.193 6.36e-13 *** age3 0.7845 0.1956 4.012 6.03e-05 *** --Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 7 ## ## ## ## ## ## ## ## ## ## ## ## Number of linear predictors: 2 Names of linear predictors: loge(P[Y=2]/P[Y=1]), loge(P[Y=3]/P[Y=2]) Dispersion Parameter for acat family: 1 Residual deviance: 5.5031 on 7 degrees of freedom Log-likelihood: -26.1526 on 7 degrees of freedom Number of iterations: 3 Prueba de paralelismo: DeltaDe<-deviance(modelo_acat2)-deviance(modelo_acat1) pchisq(DeltaDe,df = 3,lower.tail = F) ## [1] 0.6674897 Modelo de cociente de continuación con logits: log π1 + · · · + πj−1 πj modelo_cratio1<-vglm(cbind(No,Importante,MuyImp)~sex+age,family = cratio(reverse = T),data = carros_mod, contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) summary(modelo_cratio1) ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## Call: vglm(formula = cbind(No, Importante, MuyImp) ~ sex + age, family = cratio(reverse = T), data = carros_mod, contrasts = list(sex = contr.treatment(2, 2), age = contr.treatment(3, 2))) Pearson residuals: logit(P[Y<2|Y<=2]) logit(P[Y<3|Y<=3]) 1 0.61389 0.4017 2 -0.73799 0.4357 3 0.05089 -0.6570 4 -0.53274 -0.4138 5 0.65777 -0.4960 6 -0.04758 0.7617 Coefficients: (Intercept):1 (Intercept):2 sex1:1 sex1:2 age1:1 Estimate Std. Error z value Pr(>|z|) 0.5623 0.2838 1.981 0.04754 * 1.5266 0.3087 4.945 7.60e-07 *** 0.4489 0.3050 1.472 0.14109 0.5930 0.2710 2.189 0.02862 * -1.6201 0.4051 -3.999 6.36e-05 *** 8 ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## age1:2 -2.1137 0.3462 age3:1 -1.1235 0.3424 age3:2 -0.9711 0.3638 --Signif. codes: 0 ’***’ 0.001 ’**’ Number of linear predictors: -6.105 1.03e-09 *** -3.282 0.00103 ** -2.670 0.00760 ** 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 2 Names of linear predictors: logit(P[Y<2|Y<=2]), logit(P[Y<3|Y<=3]) Dispersion Parameter for cratio family: 1 Residual deviance: 3.4306 on 4 degrees of freedom Log-likelihood: -25.1164 on 4 degrees of freedom Number of iterations: 4 con paralelismo: modelo_cratio2<-vglm(cbind(No,Importante,MuyImp)~sex+age,family = cratio(reverse = T,parallel = T),data contrasts = list(sex=contr.treatment(2,2),age=contr.treatment(3,2))) DeltaDf<-deviance(modelo_cratio2)-deviance(modelo_cratio1) pchisq(DeltaDf,df = 3,lower.tail = F) ## [1] 0.640452 Bondad de ajuste en el modelo con paralelismo: pchisq(deviance(modelo_cratio2),df = 7,lower.tail = F) ## [1] 0.6459577 pchisq(sum(residuals(modelo_cratio2,type=’pearson’)^2),df = 7,lower.tail = F) ## [1] 0.6478282 Análisis de residuos y outliers. . . 9