USairpollution = matrix(c(46,11,24,47,11,31,110,23,65,26,9,17,17,35,56,10,28,14,14,13,30,10,10,16,29,18,9,31,14,69,10,61,94,26,28,12,29,56,29,8,36,
47.6,56.8,61.5,55.0,47.1,55.2,50.6,54.0,49.7,51.5,66.2,51.9,49.0,49.9,49.1,68.9,52.3,68.4,54.5,61.0,55.6,61.6,75.5,45.7,43.5,59.4,68.3,59.3,51.5,54.6,70.3,50.4,50.0,57.8,51.0,56.7,51.1,55.9,57.3,56.6,54.0,
44,46,368,652,391,35,3344,462,1007,266,641,454,104,1064,412,721,361,136,381,91,291,337,207,569,699,275,204,96,181,1692,213,347,343,197,137,453,379,775,434,125,80,
116,244,497,905,463,71,3369,453,751,540,844,515,201,1513,158,1233,746,529,507,132,593,624,335,717,744,448,361,308,347,1950,582,520,179,299,176,716,531,622,757,277,80,
8.8,8.9,9.1,9.6,12.4,6.5,10.4,7.1,10.9,8.6,10.9,9.0,11.2,10.1,9.0,10.8,9.7,8.8,10.0,8.2,8.3,9.2,9.0,11.8,10.6,7.9,8.4,10.6,10.9,9.6,6.0,9.4,10.6,7.6,8.7,8.7,9.4,9.5,9.3,12.7,9.0,
33.36,7.77,48.34,41.31,36.11,40.75,34.44,39.04,34.99,37.01,35.94,12.95,30.85,30.96,43.37,48.19,38.74,54.47,37.00,48.52,43.11,49.10,59.80,29.07,25.94,46.00,56.77,44.68,30.18,39.93,7.05,36.22,42.75,42.59,15.17,20.66,38.79,35.89,38.89,30.58,40.25,
135,58,115,111,166,148,122,132,155,134,78,86,103,129,127,103,121,116,99,100,123,105,128,123,137,119,113,116,98,115,36,147,125,115,89,67,164,105,111,82,114),41)
cities = c("Albany", "Albuquerque","Atlanta","Baltimore","Buffalo","Charleston","Chicago", "Cincinnati","Cleveland","Columbus","Dallas","Denver","DesMoines","Detroit","Hartford","Houston","Indianapolis","Jacksonville","Kansas City","Little Rock","Louisville","Memphis","Miami", "Milwaukee","Minneapolis","Nashville","New Orleans","Norfolk","Omaha","Philadelphia", "Phoenix","Pittsburgh","Providence","Richmond","Salt Lake City","San Francisco","Seattle","St. Louis","Washington","Wichita","Wilmington")
variables = c("SO2","temp","manu","popul","wind", "precip","predays")
colnames(USairpollution) = variables
rownames(USairpollution)= cities
USairpollution
cor(USairpollution[,-1])
panel.hist <- function(x, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="grey", ...)
}
pairs(USairpollution[,-1], diag.panel = panel.hist)
usair_pca <- princomp(USairpollution[,-1], cor = TRUE)
summary(usair_pca, loadings = TRUE)
par(mfrow=c(3,2))
out <- sapply(1, function(i) {
plot(USairpollution[,1],usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
out <- sapply(2, function(i) {
plot(USairpollution[,1],usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
out <- sapply(3, function(i) {
plot(USairpollution[,1],usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
out <- sapply(4, function(i) {
plot(USairpollution[,1],usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
out <- sapply(5, function(i) {
plot(USairpollution[,1],usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
out <- sapply(6, function(i) {
plot(USairpollution[,1],usair_pca$scores[,i],
xlab = paste("PC", i, sep = ""),
ylab = "Sulphur dioxide concentration")
})
USDATA=data.frame(USairpollution)
usair_pca <- princomp(USairpollution[,-1], cor = TRUE)
summary(usair_pca, loadings = TRUE)
usair_reg <- lm(USairpollution[,1]~usair_pca$scores[,1])
summary(usair_reg)
x <- USairpollution
cm <- colMeans(x)
S <- cov(x)
d <- apply(x, 1, function(x) t(x - cm) %*% solve(S) %*% (x - cm))
plot(qc <- qchisq((1:nrow(x) - 1/2) / nrow(x), df = 6), sd <- sort(d),
xlab = expression(paste(chi[6]^2, " Quantile")),
ylab = "Ordered distances", xlim = range(qc) * c(1, 1.1))
oups <- which(rank(abs(qc - sd), ties = "random") > nrow(x) - 3)
text(qc[oups], sd[oups] - 1.5, names(oups))
abline(a = 0, b = 1)
USairpollution<-USairpollution[-oups,]
usair_pca2<- princomp(USairpollution[,-1], cor = TRUE)
summary(usair_pca2, loadings = TRUE)
usair_reg2<- lm(USairpollution[,1]~usair_pca2$scores)
summary(usair_reg2)