Sample R-code for CA and MCA
[2006/11/11]

General information

Below you can find sample R-code for multiple and joint correspondence analysis. A detailed explaination of the steps is given in the appendix of Multiple Correspondence Analysis and Related Methods.

The example data set used here is 'wg93', which you can download below:

   wg93.zip (zipped .txt, ~3KB)

Make sure that the data are available in R as 'dat', i.e. by extracting the zip-file above to 'c:' and entering

dat <- read.table("C:/wg93.txt", header = TRUE, colClasses = "factor")

You can copy the code below and select 'paste commands only' in R.

Sample code for the computations in the Appendix of "Multiple Correspondence Analysis and Related Methods"

# Table 1 (response pattern matrix)

> sup.ind <- 5:7
> dat.act <- dat[,-sup.ind]
> dat.sup <- dat[,sup.ind]
> I       <- dim(dat.act)[1]
> Q       <- dim(dat.act)[2]
> dat[c(1:5,I),]
    A B C D sex age edu
1   2 3 4 3   2   2   3
2   3 4 2 3   1   3   4
3   2 3 2 4   2   3   2
4   2 2 2 2   1   2   3
5   3 3 3 3   1   5   2
871 1 2 2 2   2   3   6

# Table 2 (indicator matrix)

> lev.n  <- unlist(lapply(dat, nlevels))
> n      <- cumsum(lev.n)
> J.t    <- sum(lev.n)
> Q.t    <- dim(dat)[2]
> Z      <- matrix(0, nrow = I, ncol = J.t)
> newdat <- lapply(dat, as.numeric)
> offset <- (c(0, n[-length(n)]))
> for (i in 1:Q.t)
+   Z[1:I + (I * (offset[i] + newdat[[i]] - 1))] <- 1
> fn     <- rep(names(dat), unlist(lapply(dat, nlevels)))
> ln     <- unlist(lapply(dat, levels))
>
> dimnames(Z)[[2]] <- paste(fn, ln, sep = "")
> dimnames(Z)[[1]] <- as.character(1:I)
>
> ind.temp  <- range(n[sup.ind])
> Z.sup.ind <- (ind.temp[1]-1):ind.temp[2]
> Z.act     <- Z[,-Z.sup.ind]
> J         <- dim(Z.act)[2]
> Z.act[c(1:5,I),]
    A1 A2 A3 A4 A5 B1 B2 B3 B4 B5 C1 C2 C3 C4 C5 D1 D2 D3 D4 D5
1    0  1  0  0  0  0  0  1  0  0  0  0  0  1  0  0  0  1  0  0
2    0  0  1  0  0  0  0  0  1  0  0  1  0  0  0  0  0  1  0  0
3    0  1  0  0  0  0  0  1  0  0  0  1  0  0  0  0  0  0  1  0
4    0  1  0  0  0  0  1  0  0  0  0  1  0  0  0  0  1  0  0  0
5    0  0  1  0  0  0  0  1  0  0  0  0  1  0  0  0  0  1  0  0
871  1  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  1  0  0  0

# Table 3 (inertias of Z)

> P    <- Z.act / sum(Z.act)
> cm   <- apply(P, 2, sum)
> rm   <- apply(P, 1, sum)
> eP   <- rm %*% t(cm)
> S    <- (P - eP) / sqrt(eP)
> dec  <- svd(S)
> lam  <- dec$d[1:(J-Q)]^2
> expl <- 100*(lam / sum(lam))
> rbind(round(lam[c(1:4,(J-Q))], 3),round(expl[c(1:4,(J-Q))], 1))
       [,1]   [,2]  [,3]  [,4]  [,5]
[1,]  0.457  0.431 0.322 0.306 0.125
[2,] 11.400 10.800 8.000 7.700 3.100

# Table 4 (column standard / principal coordinates)

> b.s1 <- dec$v[,1] / sqrt(cm)
> b.s2 <- dec$v[,2] / sqrt(cm)
> g.s1 <- b.s1 * sqrt(lam[1])
> g.s2 <- b.s2 * sqrt(lam[2])
> round(rbind(b.s1,b.s2,g.s1,g.s2)[,c(1:5,16:20)], 3)
         A1    A2     A3     A4     A5     D1     D2     D3     D4     D5
b.s1  1.837 0.546 -0.447 -1.166 -1.995  1.204 -0.221 -0.385 -0.222  0.708
b.s2 -0.727 0.284  1.199 -0.737 -2.470 -1.822  0.007  1.159  0.211 -1.152
g.s1  1.242 0.369 -0.302 -0.788 -1.349  0.814 -0.150 -0.260 -0.150  0.479
g.s2 -0.478 0.187  0.787 -0.484 -1.622 -1.196  0.005  0.761  0.138 -0.756

# Table 5 (row principal coordinates)

> f.s1 <- dec$u[,1] * sqrt(lam[1]) / sqrt(rm)
> f.s2 <- dec$u[,2] * sqrt(lam[2]) / sqrt(rm)
> a.s1 <- f.s1 / sqrt(lam[1])
> a.s2 <- f.s2 / sqrt(lam[2])
> round(rbind(f.s1,f.s2)[,c(1:5,I)], 3)
          1      2     3     4      5   871
f.s1 -0.210 -0.325 0.229 0.303 -0.276 0.626
f.s2  0.443  0.807 0.513 0.387  1.092 0.135

# Table 6 (Burt matrix)

> B <- t(Z.act) %*% Z.act
> B[c(1:5,16:20), c(1:5,16:20)]
    A1  A2  A3  A4 A5 D1  D2  D3  D4  D5
A1 119   0   0   0  0 15  25  17  34  28
A2   0 322   0   0  0 22 102  76  68  54
A3   0   0 204   0  0 10  44  68  58  24
A4   0   0   0 178  0  9  52  28  54  35
A5   0   0   0   0 48  4   9  13  12  10
D1  15  22  10   9  4 60   0   0   0   0
D2  25 102  44  52  9  0 232   0   0   0
D3  17  76  68  28 13  0   0 202   0   0
D4  34  68  58  54 12  0   0   0 226   0
D5  28  54  24  35 10  0   0   0   0 151

# Table 7 (principal inertias of Burt matrix)

> P.2     <- B / sum(B)
> cm.2    <- apply(P.2, 2, sum)
> eP.2    <- cm.2 %*% t(cm.2)
> S.2     <- (P.2 - eP.2) / sqrt(eP.2)
> dec.2   <- eigen(S.2)
> delt.2  <- dec.2$values[1:(J-Q)]
> expl.2  <- 100*(delt.2 / sum(delt.2))
> lam.2   <- delt.2^2
> expl.2b <- 100*(lam.2 / sum(lam.2))
> rbind(round(lam.2, 3),round(expl.2b, 1))[,c(1:4,16)]
       [,1]   [,2]  [,3]  [,4]  [,5]
[1,]  0.209  0.186 0.104 0.094 0.016
[2,] 18.600 16.500 9.200 8.300 1.400

# Addendum: "check" that ds is equivalent to ls:

> rbind(round(delt.2, 3),round(expl.2, 1), round(lam, 3),round(expl, 1))
       [,1]   [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10]
[1,]  0.457  0.431 0.322 0.306 0.276 0.252 0.243 0.235 0.225 0.221
[2,] 11.400 10.800 8.000 7.700 6.900 6.300 6.100 5.900 5.600 5.500
[3,]  0.457  0.431 0.322 0.306 0.276 0.252 0.243 0.235 0.225 0.221
[4,] 11.400 10.800 8.000 7.700 6.900 6.300 6.100 5.900 5.600 5.500
     [,11] [,12] [,13] [,14] [,15] [,16]
[1,]  0.21 0.197 0.178 0.169 0.153 0.125
[2,]  5.20 4.900 4.400 4.200 3.800 3.100
[3,]  0.21 0.197 0.178 0.169 0.153 0.125
[4,]  5.20 4.900 4.400 4.200 3.800 3.100

# Table 8 (eigenvectors, column masses, column sc/pc's)

> u.s1  <- dec.2$vectors[,1]
> u.s2  <- dec.2$vectors[,2]
> a2.s1 <- u.s1 / sqrt(cm.2)
> a2.s2 <- u.s2 / sqrt(cm.2)
> f2.s1 <- a2.s1 * sqrt(lam.2[1])
> f2.s2 <- a2.s2 * sqrt(lam.2[2])
> round(rbind(u.s1,u.s2,cm,a2.s1,a2.s2,f2.s1,f2.s2), 3)[,c(1:5,16:20)]
          A1    A2     A3     A4     A5     D1     D2     D3     D4     D5
u.s1   0.339 0.166 -0.108 -0.264 -0.234  0.158 -0.057 -0.093 -0.056  0.147
u.s2  -0.134 0.086  0.290 -0.167 -0.290 -0.239  0.002  0.279  0.054 -0.240
cm     0.034 0.092  0.059  0.051  0.014  0.017  0.067  0.058  0.065  0.043
a2.s1  1.837 0.546 -0.447 -1.166 -1.995  1.204 -0.221 -0.385 -0.222  0.708
a2.s2 -0.727 0.284  1.199 -0.737 -2.470 -1.822  0.007  1.159  0.211 -1.152
f2.s1  0.840 0.250 -0.204 -0.533 -0.913  0.551 -0.101 -0.176 -0.101  0.324
f2.s2 -0.314 0.123  0.517 -0.318 -1.064 -0.785  0.003  0.499  0.091 -0.496

# Table 9 (adjusted inertias)

> lam.adj   <- (Q/(Q-1))^2 * (delt.2[delt.2 >= 1/Q] - 1/Q) ^ 2
> total.adj <- (Q/(Q-1)) * (sum(delt.2^2) - ((J-Q)/Q^2))
> rbind(round(lam.adj, 5),100 * round(lam.adj / total.adj, 3))
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0.07646 0.05822 0.0092 0.00567 0.00117 1e-05
[2,] 44.90000 34.20000 5.4000 3.30000 0.70000 0e+00

# Table 10

> nd      <- 2
> maxit   <- 1000
> epsilon <- 0.0001
> lev     <- lev.n[-sup.ind]
> n       <- sum(B)
> li      <- as.vector(c(0,cumsum(lev)))
> dummy   <- matrix(0, J, J)
> for (i in 1:(length(li)-1)) {
+   ind.lo <- li[i]+1
+   ind.up <- li[i+1]
+   ind.to <- diff(li)[i]
+   dummy[rep(ind.lo:ind.up, ind.to) +
+     (rep(ind.lo:ind.up, each = ind.to)-1) * J] <- 1
+   }
> iterate <- function(obj, dummy, nd, adj = FALSE) {
+   Bp     <- obj/n
+   cm     <- apply(Bp, 2, sum)
+   eP     <- cm %*% t(cm)
+   cm.mat <- diag(cm^(-0.5))
+   S      <- cm.mat %*% (Bp - eP) %*% cm.mat
+   dec    <- eigen(S)
+   lam    <- dec$values
+   u      <- dec$vectors
+   phi    <- u[, 1:nd] / matrix(rep(sqrt(cm), nd), ncol = nd)
+   if (adj)
+     lam <- (Q / (Q - 1))^2 * (lam[lam >= 1 / Q] - 1 / Q) ^ 2
+   for (s in 1:nd) {
+     if (exists("coord")) {
+       coord <- coord + lam[s] * (phi[,s] %*% t(phi[,s]))
+       } else {
+       coord <- lam[s] * (phi[,s] %*% t(phi[,s]))
+       }
+     }
+   obj * (1 - dummy) + n * eP * dummy * (1 + coord)
+   }
> # first iteration (adjusted lambda)
> B.star <- iterate(B, dummy, 2, adj = TRUE)
> # subsequent iterations
> k  <- 1
> it <- TRUE
> while (it) {
+   temp    <- iterate(B.star, dummy, 2)
+   delta.B <- max(abs(B.star - temp))
+   B.star  <- temp
+   if (delta.B <= epsilon | k >= maxit) it <- FALSE
+   k <- k + 1
+   }
> round(B.star [c(1:5,16:20), c(1:5, 16:20)], 2)
      A1     A2    A3    A4    A5    D1     D2    D3    D4    D5
A1 30.72  53.14 18.59 13.97  2.58 15.00  25.00 17.00 34.00 28.00
A2 53.14 130.55 76.80 51.80  9.71 22.00 102.00 76.00 68.00 54.00
A3 18.59  76.80 62.95 38.86  6.80 10.00  44.00 68.00 58.00 24.00
A4 13.97  51.80 38.86 53.51 19.85  9.00  52.00 28.00 54.00 35.00
A5  2.58   9.71  6.80 19.85  9.06  4.00   9.00 13.00 12.00 10.00
D1 15.00  22.00 10.00  9.00  4.00  9.02  14.67  5.03 13.27 18.01
D2 25.00 102.00 44.00 52.00  9.00 14.67  62.46 55.78 60.90 38.20
D3 17.00  76.00 68.00 28.00 13.00  5.03  55.78 63.56 56.49 21.14
D4 34.00  68.00 58.00 54.00 12.00 13.27  60.90 56.49 59.74 35.60
D5 28.00  54.00 24.00 35.00 10.00 18.01  38.20 21.14 35.60 38.04

# Table 11 (JCA inertias)

> P.3    <- B.star / sum(B.star)
> cm.3   <- apply(P.3, 2, sum)
> eP.3   <- cm.3 %*% t(cm.3)
> S.3    <- (P.3 - eP.3) / sqrt(eP.3)
> delt.3 <- eigen(S.3)$values
> lam.3  <- delt.3^2
> expl.3 <- 100*(lam.3 / sum(lam.3))
> rbind(round(lam.3, 3),round(expl.3, 1))[,1:2]
       [,1]   [,2]
[1,]  0.099  0.065
[2,] 54.300 35.600

# Table 12 (Inertia conributions of submatrices)

> subinr <- function(B, ind) {
+   nn   <- length(ind)
+   subi <- matrix(NA, nrow = nn, ncol = nn)
+   ind2 <- c(0,cumsum(ind))
+   for (i in 1:nn) {
+     for (j in 1:nn) {
+       tempmat <- B[(ind2[i]+1):(ind2[i+1]),
+                    (ind2[j]+1):(ind2[j+1])]
+       tempmat <- tempmat / sum(tempmat)
+       ec <- apply(tempmat, 2, sum)
+       ex <- ec%*%t(ec)
+       subi[i,j] <- sum((tempmat - ex)^2 / ex)
+       }
+     }
+   subi / nn^2
+   }
> si <- subinr(B.star, lev)
> round(si, 5)
        [,1]    [,2]    [,3]    [,4]
[1,] 0.00745 0.03160 0.01237 0.01670
[2,] 0.04997 0.02244 0.05218 0.00789
[3,] 0.01409 0.04598 0.02103 0.03404
[4,] 0.02474 0.00740 0.03427 0.00381

# Table 13 (supplementary principal coordinates as columns of Z)

> Z.star  <- Z[,Z.sup.ind]
> I.star  <- dim(Z.star)[1]
> cs.star <- apply(Z.star, 2, sum)
> base    <- Z.star / matrix(rep(cs.star, I.star), nrow = I.star, byrow = TRUE)
> b.star1 <- t(base) %*% cbind(a.s1, a.s2)
> round(t(b.star1), 3)
       sex1   sex2   age1   age2   age3   age4  age5  age6
a.s1 -0.143  0.137 -0.166 -0.087 -0.025 -0.031 0.016 0.281
a.s2  0.029 -0.028 -0.014 -0.081 -0.004  0.057 0.047 0.033
     edu1  edu2   edu3   edu4   edu5   edu6
a.s1 0.18 0.161 -0.068 -0.227 -0.172 -0.308
a.s2 0.06 0.093  0.090 -0.279 -0.263 -0.291

# Table 14 (supplementary principal coordinates via cross-tabulation)

> ct.star  <- t(Z.star)%*%Z.act
> I.star2  <- dim(ct.star)[2]
> cs.star2 <- apply(ct.star, 1,sum)
> base2    <- ct.star / matrix(rep(cs.star2, I.star2), ncol = I.star2)
> b.star2  <- base2 %*% cbind(a2.s1, a2.s2)
> round(t(b.star2), 3)
        sex1   sex2   age1   age2   age3   age4  age5  age6
a2.s1 -0.097  0.093 -0.112 -0.059 -0.017 -0.021 0.011 0.190
a2.s2  0.019 -0.018 -0.009 -0.053 -0.003  0.038 0.031 0.022
       edu1  edu2   edu3   edu4   edu5   edu6
a2.s1 0.122 0.109 -0.046 -0.154 -0.116 -0.209
a2.s2 0.039 0.061  0.059 -0.183 -0.172 -0.191

# Table 15 (principal inertias from subset analysis)

> sub.ind  <- c(3,8,13,18)
> P.4      <- B / sum(B)
> cm.4     <- apply(P.4, 2, sum)
> eP.4     <- cm.4 %*% t(cm.4)
> S.sub    <- ((P.4 - eP.4) / sqrt(eP.4)) [-sub.ind,-sub.ind]
> dec.sub  <- eigen(S.sub)
> lam.sub  <- dec.sub$values[1:(J-Q)]^2
> expl.sub <- 100*(lam.sub / sum(lam.sub))
> rbind(round(lam.sub, 4),round(expl.sub, 1))[,c(1:4,(J-Q))]
        [,1]    [,2]   [,3]   [,4]   [,5]
[1,]  0.2016  0.1489  0.098 0.0721 0.0017
[2,] 23.4000 17.3000 11.400 8.4000 0.2000

# Table 16 (column standard and principal coordinates from subset analysis)

> cm.sub   <- cm.4[-sub.ind]
> u.sub.s1 <- dec.sub$vectors[,1]
> u.sub.s2 <- dec.sub$vectors[,2]
> a.sub.s1 <- u.sub.s1 / sqrt(cm.sub)
> a.sub.s2 <- u.sub.s2 / sqrt(cm.sub)
> f.sub.s1 <- a.sub.s1 * sqrt(lam.sub[1])
> f.sub.s2 <- a.sub.s2 * sqrt(lam.sub[2])
> round(rbind(a.sub.s1,a.sub.s2,f.sub.s1,f.sub.s2), 3)[, c(1:4,13:16)]
            A1     A2     A4     A5    D1     D2     D4    D5
a.sub.s1 1.696  0.538 -1.316 -2.449 0.888 -0.273 -0.222 0.482
a.sub.s2 1.153 -0.517 -0.015  2.746 2.482 -0.462 -0.683 1.210
f.sub.s1 0.761  0.242 -0.591 -1.100 0.399 -0.123 -0.100 0.216
f.sub.s2 0.445 -0.200 -0.006  1.060 0.957 -0.178 -0.264 0.467