5 measures of socio–economic indicators on 47 French-speaking Swiss provinces (1888):
Agriculture – % of males involved in agriculture as occupation
Examination – % “draftees” receiving highest mark on army examination
Education – % education beyond primary school for “draftees”
Catholic – % catholic (as opposed to “protestant”)
Infant.Mortality – % live births who live less than 1 year
library(cluster)
attach(swiss)
par(mfrow=c(1,2))
swiss.x=swiss[,-1]
W=rep(0,10)
W[1]=kmeans(swiss.x,1,nstart=25)$tot.withinss
S=rep(0,9)
for (k in 2:10){
swiss.kmeans=kmeans(swiss.x,k,nstart=25)
W[k]=swiss.kmeans$tot.withinss
S[k]=mean(silhouette(swiss.kmeans$cluster,dist(swiss.x,method="euclidean"))[,3])
}
plot(1:10,W[1:10],xlab="number of clusters",ylab="Within-cluster sum of squares",col="blue")
plot(2:10,S[2:10],xlab="number of clusters",ylab="Silhouette coefficient",col="blue")
swiss.kmeans=kmeans(swiss.x,2,nstart=25)
print(swiss.kmeans)
## K-means clustering with 2 clusters of sizes 16, 31
##
## Cluster means:
## Agriculture Examination Education Catholic Infant.Mortality
## 1 65.51875 9.43750 6.62500 96.15000 20.7750
## 2 42.99032 20.12903 13.22581 12.75355 19.5129
##
## Clustering vector:
## Courtelary Delemont Franches-Mnt Moutier Neuveville Porrentruy
## 2 1 1 2 2 1
## Broye Glane Gruyere Sarine Veveyse Aigle
## 1 1 1 1 1 2
## Aubonne Avenches Cossonay Echallens Grandson Lausanne
## 2 2 2 2 2 2
## La Vallee Lavaux Morges Moudon Nyone Orbe
## 2 2 2 2 2 2
## Oron Payerne Paysd'enhaut Rolle Vevey Yverdon
## 2 2 2 2 2 2
## Conthey Entremont Herens Martigwy Monthey St Maurice
## 1 1 1 1 1 1
## Sierre Sion Boudry La Chauxdfnd Le Locle Neuchatel
## 1 1 2 2 2 2
## Val de Ruz ValdeTravers V. De Geneve Rive Droite Rive Gauche
## 2 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 5516.926 25352.259
## (between_SS / total_SS = 72.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
par(mfrow=c(1,1),pty="s")
swiss.pca=prcomp(swiss.x,scale=T)
swiss.centers=predict(swiss.pca,swiss.kmeans$centers)
plot(swiss.pca$x[,1],swiss.pca$x[,2],xlab="First pricniple component",ylab="Second principle component",type="n")
points(swiss.centers[,1:2],pch=19,cex=2,col=c("orange","blue"))
text(swiss.pca$x[,1],swiss.pca$x[,2],dimnames(swiss.x)[[1]],col=c("orange","blue")[swiss.kmeans$cluster],cex=.5)
title("2-means",col.main="red")
library(cluster)
par(mfrow=c(1,2))
S=rep(0,9)
for (k in 2:10){
swiss.kmedoids=pam(swiss.x,k,metric="euclidean")
S[k]=swiss.kmedoids$silinfo$avg.width
}
plot(2:10,S[2:10],xlab="number of clusters",ylab="Silhouette coefficient",col="blue")
title("Euclidean distance",col.main="red")
swiss.kmedoids=pam(swiss.x,2,metric="euclidean")
plot(swiss.pca$x[,1],swiss.pca$x[,2],xlab="First pricniple component",ylab="Second principle component",type="n")
swiss.medoids=predict(swiss.pca, swiss.kmedoids$medoids)
points(swiss.medoids,pch=19,cex=2,col=c("orange","blue"))
text(swiss.pca$x[,1],swiss.pca$x[,2],dimnames(swiss.x)[[1]],col=c("orange","blue")[swiss.kmeans$cluster],cex=.5)
title("2-medoids (Euclidean distance)",col.main="red")
print(swiss.kmedoids$medoids)
## Agriculture Examination Education Catholic Infant.Mortality
## Yverdon 49.5 15 8 6.10 22.5
## Glane 67.8 14 8 97.16 24.9
par(mfrow=c(1,1))
plot(swiss.kmedoids,which=2,main=NA)
title("Silhouette plot (Euclidean distance)",col.main="red")
par(mfrow=c(1,2))
S=rep(0,9)
for (k in 2:10){
swiss.kmedoids=pam(swiss.x,k,metric="manhattan")
S[k]=swiss.kmedoids$silinfo$avg.width
}
plot(2:10,S[2:10],xlab="number of clusters",ylab="Silhouette coefficient",col="blue")
title("Manhattan distance",col.main="red")
swiss.kmedoids=pam(swiss.x,2,metric="manhattan")
plot(swiss.pca$x[,1],swiss.pca$x[,2],xlab="First pricniple component",ylab="Second principle component",type="n")
swiss.medoids=predict(swiss.pca, swiss.kmedoids$medoids)
points(swiss.medoids,pch=19,cex=2,col=c("orange","blue"))
text(swiss.pca$x[,1],swiss.pca$x[,2],dimnames(swiss.x)[[1]],col=c("orange","blue")[swiss.kmeans$cluster],cex=.5)
title("2-medoids (Manhattan distance)",col.main="red")
print(swiss.kmedoids$medoids)
## Agriculture Examination Education Catholic Infant.Mortality
## Neuveville 43.5 17 15 5.16 20.6
## Monthey 64.9 7 3 98.22 20.2
par(mfrow=c(1,1))
plot(swiss.kmedoids,which=2,main=NA)
title("Silhouette plot (Manhattan distance)",col.main="red")
library(cluster)
swiss.agnes=agnes(swiss.x,method="single",metric="euclidean")
pltree(swiss.agnes,col.main="red")
cat("\nAgglomerative coefficient=",swiss.agnes$ac,"\n")
##
## Agglomerative coefficient= 0.7620692
swiss.agnes=agnes(swiss.x,method="complete",metric="euclidean")
pltree(swiss.agnes,col.main="red")
cat("\nAgglomerative coefficient=",swiss.agnes$ac,"\n")
##
## Agglomerative coefficient= 0.9085089
swiss.agnes=agnes(swiss.x,method="average",metric="euclidean")
pltree(swiss.agnes,col.main="red")
cat("\nAgglomerative coefficient=",swiss.agnes$ac,"\n")
##
## Agglomerative coefficient= 0.8774795
swiss.agnes=agnes(swiss.x,method="average",metric="manhattan")
pltree(swiss.agnes,col.main="red")
cat("\nAgglomerative coefficient=",swiss.agnes$ac,"\n")
##
## Agglomerative coefficient= 0.8539078
library(cluster)
swiss.diana=diana(swiss.x)
pltree(swiss.diana,col.main="red")
cat("\nDivisive coefficient=",swiss.agnes$ac,"\n")
##
## Divisive coefficient= 0.8539078