Rcom - Bloc de notas

Anuncio
Rcom
###### Módulo 6: Análisis multivariado ######
###### Profesor: Luis E. Nieto-Barajas ######
###### LIBRERIAS EN R ######
library(TeachingDemos)
library(mvtnorm)
library(cluster)
library(MASS)
library(tree)
###### COMANDOS DE R ######
# (1) Lectura de los datos ----------------------------------------marg<-read.table("http://allman.rhon.itam.mx/~lnieto/index_archivos/Marg90.txt",hea
der=T,row.names=1)
head(marg)
# (2) Estadísticas descriptivas -----------------------------------# (2.1) Media
apply(marg,2,mean)
colMeans(marg)
# (2.2) Varianzas-Covarianzas y Correlaciones
var(marg)
cor(marg)
marg.var<-var(marg[,1:9])
marg.cor<-cor(marg[,1:9])
# (2.3) Otros resúmenes
apply(marg,2,summary)
summary(marg)
# (3) Análisis gráfico --------------------------------------------# (3.1) Diagramas de dispersión bidimensional
plot(marg[,1],marg[,2],xlab="ANALF",ylab="S.PRI")
plot(marg[,1],marg[,2],type="n",xlab="ANALF",ylab="S.PRI")
text(marg[,1],marg[,2],labels=dimnames(marg)[[1]])
# (3.2) Matriz de dispersión bidimensional
pairs(marg[,1:9])
pairs(marg[,1:4])
pairs(marg[,5:9])
# (3.3) Diagramas de dispersión tridimensional
Graph > 3D Plot > 3D Scatter Plot
brush(marg[,1:3])
# (3.4) Diagramas de burbujas
symbols(marg[,1],marg[,2],circles=marg[,3],xlab="ANALF",ylab="S.PRI")
text(marg[,1],marg[,2],labels=dimnames(marg)[[1]])
# (3.5) Caras de Chernoff
faces(marg[,1:9])
faces2(marg[,1:9])
# (3.6) Diagrama de estrellas
stars(marg[,1:9])
# (3.7) Estandarización
Página 1
Rcom
marg.est<-scale(marg[,1:9])
# (3.8) Diagrama de Andrews
source("http://allman.rhon.itam.mx/~lnieto/index_archivos/Standrews.txt")
st.andrews(marg.est[,1:9])
# (4) Distribución normal multivariada ---------------------------#(4.1) Muestra
mu<-c(0,0)
sigma<-matrix(c(1,0,0,1),2,2)
x<-rmvnorm(100,mu,sigma)
plot(x)
#(4.2) Grafica de densidad bivariada
x1<-seq(-5,5,,50)
x2<-seq(-5,5,,50)
dnbg<-function(x1, x2, mu, sigma)
{
require(mvtnorm)
n <- length(x1)
z <- matrix(0, n, n)
for(i in 1:n) {
for(j in 1:n) {
z[i, j] <- dmvnorm(c(x1[i], x2[j]), mu, sigma)
}
}
z
}
z<-dnbg(x1,x2,mu,sigma)
par(mfrow=c(1,2))
persp(x1,x2,z, theta=120, phi=15)
contour(x1,x2,z)
#(4.3) Prueba de normalidad univariada
par(mfrow=c(2,2))
qqnorm(marg[,1])
hist(marg[,1])
boxplot(marg[,1])
#(4.4) Prueba de normalidad bivariada
x<-cbind(log(marg[,1]),marg[,2])
mu<-apply(x,2,mean)
sigma<-var(x)
n<-dim(x)[1]
d<-1:n
for (i in 1:n){d[i]<-(x[i,]-mu)%*%solve(sigma)%*%(x[i,]-mu)}
d2<-qchisq(((1:n)-0.5)/n, 2)
par(mfrow=c(1,1))
qqplot(d,d2)
# (5) Inferencia para rho ---------------------------------------# (5.1) Intervalos de confianza para la correlacion
tanh(c(atanh(marg.cor[1,2])-pnorm(.975)/sqrt(nrow(marg)-3),
atanh(marg.cor[1,2])+pnorm(.975)/sqrt(nrow(marg)-3)))
# (5.2) Pruebas de hipotesis sobre la correlacion
cor.test(marg[,1],marg[,2])
marg.test<-matrix(0,0,ncol=13,nrow=13)
for (j in 2:13){
for (i in 1:(j-1)){
Página 2
Rcom
marg.test[i,j]<-cor.test(marg[,i],marg[,j])$p.value
}
}
marg.test
# (6) Albegra de Matrices ---------------------------------------marg.var<-var(marg[,1:9])
# (6.1) Eigenvalores y Eigenvectores
eigen(marg.var)
# (6.2) Determinante
det(marg.var)
prod(eigen(marg.var)$values)
# (6.3) Traza
sum(eigen(marg.var)$values)
# (7) Análisis de componentes principales -----------------------marg.pc<-princomp(marg[,1:9])
print(marg.pc,loadings=T)
summary(marg.pc,loadings=T)
screeplot(marg.pc)
barplot(marg.pc$sdev^2/sum(marg.pc$sdev^2),xlab="Component",ylab="Prop. Var.")
biplot(marg.pc)
biplot(marg.pc,choices=c(1,3))
biplot(marg.pc,choices=2:3)
plot(-marg.pc$scores[,1],marg[,12])
marg.pc.cor<-princomp(marg[,1:9],cor=T)
summary(marg.pc.cor,loadings=T)
screeplot(marg.pc.cor)
barplot(marg.pc.cor$sdev^2/sum(marg.pc.cor$sdev^2),xlab="Component",ylab="Prop.
Var.")
biplot(marg.pc.cor)
plot(-marg.pc.cor$scores[,1],marg[,12])
# (8) Análisis de cúmulos ---------------------------------------# (8.1) Distancia
marg.dist<-dist(marg[,1:9])
# (8.2) Métodos de liga
marg.cl.com<-hclust(marg.dist,method="com")
plclust(marg.cl.com,labels=dimnames(marg)[[1]])
rect.hclust(marg.cl.com, k=5, border="red")
cutree(marg.cl.com,5,)
marg.cl.com.gr<-cutree(marg.cl.com,5,)
marg[marg.cl.com.gr==1,]
summary(marg[marg.cl.com.gr==1,])
by(marg[,1:9],marg.cl.com.gr,summary)
Página 3
Rcom
plot(cutree(marg.cl.com,5,)[1:32],marg[,13])
plclust(subtree(marg.cl.com,c(2,9,19)))
marg.dist.est<-dist(marg.est[,1:9])
marg.cl.com.est<-hclust(marg.dist.est,method="com")
plclust(marg.cl.com.est,labels=dimnames(marg)[[1]])
rect.hclust(marg.cl.com.est, k=5, border="red")
# (8.3) Método de Ward
marg.cl.ward<-hclust(marg.dist,method="ward")
plclust(marg.cl.ward)
rect.hclust(marg.cl.ward, k=5, border="red")
# (8.4) Método de k-medias
marg.kmeans<-kmeans(marg[,1:9],5)
# (8.5) Agrupación de variables
marg.cor<-cor(marg[,1:9])
marg.vcl.com<-hclust(as.dist(1-abs(marg.cor)),method="com")
plclust(marg.vcl.com,labels=dimnames(marg[,1:9])[[2]])
rect.hclust(marg.vcl.com, k=5, border="red")
# (8.6) Método divisivo
marg.cl.div<-diana(marg[,1:9])
marg.cl.div$dc
plot(marg.cl.div)
# (9) Escalamiento multidimensional ------------------------------marg.scale<-cmdscale(marg.dist)
plot(marg.scale,type="n")
text(marg.scale[,1],marg.scale[,2],labels=dimnames(marg)[[1]])
source("http://allman.rhon.itam.mx/~lnieto/index_archivos/Stress.txt")
marg.stress<-cmdscale.gof(marg.dist,k=9)
par(mfrow=c(2,1))
plot(marg.stress$gof1,main="gof 1",ylab="")
plot(marg.stress$gof2,main="gof 2",ylab="")
# (10) Análisis de factores --------------------------------------marg.fa3<-factanal(marg[,1:9],3,rotation="none")
summary(marg.fa3)
print(marg.fa3)
marg.fa3$loadings
varimax(marg.fa3$loadings)
promax(marg.fa3$loadings)
marg.fa3<-factanal(marg[,1:9],3,rotation="varimax",scores="reg")
par(mfrow=c(1,1))
biplot(marg.fa3$scores[,1:2],marg.fa3$loadings[,1:2])
biplot(marg.fa3$scores[,c(1,3)],marg.fa3$loadings[,c(1,3)])
biplot(marg.fa3$scores[,c(2,3)],marg.fa3$loadings[,c(2,3)])
marg.fa4<-factanal(marg[,1:9],4,rotation="none")
summary(marg.fa4)
print(marg.fa4)
Página 4
Rcom
marg.fa4<-factanal(marg[,1:9],4,rotation="varimax",scores="reg")
marg.fa4$loadings
par(mfrow=c(2,3))
biplot(marg.fa4$scores[,1:2],marg.fa4$loadings[,1:2])
biplot(marg.fa4$scores[,c(1,3)],marg.fa4$loadings[,c(1,3)])
biplot(marg.fa4$scores[,c(1,4)],marg.fa4$loadings[,c(1,4)])
biplot(marg.fa4$scores[,2:3],marg.fa4$loadings[,2:3])
biplot(marg.fa4$scores[,c(2,4)],marg.fa4$loadings[,c(2,4)])
biplot(marg.fa4$scores[,c(3,4)],marg.fa4$loadings[,c(3,4)])
marg.dist.fa4<-dist(marg.fa4$scores)
marg.clust.fa4<-hclust(marg.dist.fa4,method="com")
par(mfrow=c(1,1))
plclust(marg.clust.fa4)
rect.hclust(marg.clust.fa4, k=4, border="red")
marg.clust.fa4.cut<-cutree(marg.clust.fa4,4,)
by(marg.fa4$scores,marg.clust.fa4.cut,summary)
# (11) Análisis discriminante -----------------------------------# (11.1) Marginación
# (11.1.A) Discriminante Lineal
marg.lda<-lda(marg[,13] ~ marg[,1]+marg[,2]+marg[,3]+marg[,4]+marg[,5]+marg[,6]+
marg[,7]+marg[,8]+marg[,9])
marg.dv<-as.matrix(marg[,1:9])%*%as.matrix(marg.lda$scaling)
plot(marg.dv[,1],marg.dv[,2],type="n")
#text(marg.dv[,1],marg.dv[,2],labels=dimnames(marg)[[1]])
text(marg.dv[,1],marg.dv[,2],labels=factor(marg[,13]))
marg.lda.cv<-lda(marg[,13] ~ marg[,1]+marg[,2]+marg[,3]+marg[,4]+marg[,5]+marg[,6]+
marg[,7]+marg[,8]+marg[,9],CV=T)
marg.lda.cv$class
table(marg[,13],marg.lda.cv$class)
# (11.1.B) Arbol de clasificación
marg.tree<-tree(factor(marg[,13]) ~ marg[,1]+marg[,2]+marg[,3]+marg[,4]+marg[,5]+
marg[,6]+marg[,7]+marg[,8]+marg[,9])
summary(marg.tree)
plot(marg.tree)
text(marg.tree)
plot(marg[,1],marg[,8],type="n")
text(marg[,1],marg[,8],labels=factor(marg[,13]))
abline(h=29.665)
abline(v=c(5.55,12.48,17.79))
marg.tree.p<-predict(marg.tree,marg[,1:9])
pclass<-1:32
for (i in 1:32){
pclass[i]<-order(marg.tree.p[i,])[5]
}
table(marg[,13],pclass)
# (11.2) Crédito
credit<-read.table("http://allman.rhon.itam.mx/~lnieto/index_archivos/Credit.txt",h
eader=T)
Página 5
head(credit)
credit.est<-scale(credit)
Rcom
# (11.2.A) Discriminante Lineal
credit.lda<-lda(credit[,1] ~ credit.est[,2]+credit.est[,3]+credit.est[,4]+
credit.est[,5]+credit.est[,6]+credit.est[,7]+credit.est[,8])
credit.dv<-as.matrix(credit.est[,2:8])%*%as.matrix(credit.lda$scaling)
n<-length(credit.dv[,1])
y<-runif(n)
plot(credit.dv[,1],y,type="n")
text(credit.dv[,1],y,labels=factor(credit[,1]))
credit.lda.cv<-lda(credit[,1] ~ credit.est[,2]+credit.est[,3]+credit.est[,4]+
credit.est[,5]+credit.est[,6]+credit.est[,7]+credit.est[,8],CV=T)
credit.lda.cv$class
table(credit[,1],credit.lda.cv$class)
# (11.2.B) Discriminante Cuadrático
credit.qda<-qda(credit[,1] ~ credit.est[,2]+credit.est[,3]+credit.est[,4]+
credit.est[,5]+credit.est[,6]+credit.est[,7]+credit.est[,8])
credit.qda.cv<-qda(credit[,1] ~ credit.est[,2]+credit.est[,3]+credit.est[,4]+
credit.est[,5]+credit.est[,6]+credit.est[,7]+credit.est[,8],CV=T)
credit.qda.cv$class
table(credit[,1],credit.qda.cv$class)
# (11.1.B) Arbol de clasificación
credit.tree<-tree(factor(credit[,1]) ~ credit[,2]+credit[,3]+credit[,4]+
credit[,5]+credit[,6]+credit[,7]+credit[,8])
summary(credit.tree)
plot(credit.tree)
text(credit.tree)
credit.tree.p<-predict(credit.tree,credit[,2:8])
pclass<-1:113
for (i in 1:113){
pclass[i]<-order(credit.tree.p[i,])[2]
}
table(credit[,1],pclass)
Página 6
Descargar