Principal Component AnalysisDataInput values must be separated by tabs. Copy and paste from Excel/Numbers.
Basic statisticsCorrelationScatter plot matrices Scree plotResults of principal component analysisPlotBiplotCluster analysis using the principal component scores(Ward method with the squared Euclidean distance technique) R session info
Note
This web application is developed with Shiny. List of Packages Used library(shiny)
library(shinyAce)
library(psych)
Code Source code for this application is mostly based on this website maintained by Dr. Tabata. The code for this web application is available at GitHub.
If you want to run this code on your computer (in a local R session), run the code below:
Citation in Publications Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp Article Mizumoto, A., & Plonsky, L. (2015). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, Advance online publication. doi:10.1093/applin/amv025 Recommended The handbook of Research in Foreign Language Learning and Teaching(Takeuchi & Mizumoto, 2012) To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall. Also, if you are a cool Mac user and want to use R with GUI, MacR is defenitely the way to go! Author
Atsushi MIZUMOTO,
Ph.D.
![]()
|
library(shiny)
library(shinyAce)
library(psych)
shinyServer(function(input, output) {
bs <- reactive({
if (input$rowname == 1) {
x <- read.csv(text=input$text, sep="\t")
x <- x[, -1]
}else{
x <- read.csv(text=input$text, sep="\t")
}
describe(x)[2:13]
})
correl <- reactive({
if (input$rowname == 1) {
x <- read.csv(text=input$text, sep="\t")
x <- x[, -1]
}else{
x <- read.csv(text=input$text, sep="\t")
}
round(cor(cbind(x), use = "complete"),3)
})
makecorPlot <- function(){
if (input$rowname == 1) {
x <- read.csv(text=input$text, sep="\t")
x <- x[, -1]
}else{
x <- read.csv(text=input$text, sep="\t")
}
pairs.panels(x)
}
output$corPlot <- renderPlot({
print(makecorPlot())
})
makesPlot <- function(){
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
dat <- dat[, -1]
}else{
dat <- read.csv(text=input$text, sep="\t")
}
VSS.scree(dat, main = "")
}
output$sPlot <- renderPlot({
print(makesPlot())
})
pcaresult <- reactive({
options(digits=3)
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
rowvar <- matrix(dat[,1])
rownames(dat) <- rowvar
datpca <- as.matrix(dat[,-1])
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
info <- summary(respca)
eigen <- info[[1]]^2
newinfo <- rbind("Eigen values"=eigen, info$importance)
cat("Importance of components:", "\n")
print(newinfo)
pcloadings <- t(respca$sdev*t(respca$rotation))
cat("\n", "Principal component loadings:", "\n")
print(pcloadings)
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
cat("\n", "Principal component scores:", "\n")
print(pcscores)
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
info <- summary(respca)
eigen <- info[[1]]^2
newinfo <- rbind("Eigen values"=eigen, info$importance)
cat("Importance of components:", "\n")
print(newinfo)
pcloadings <- t(respca$sdev*t(respca$rotation))
cat("\n", "Principal component loadings:", "\n")
print(pcloadings)
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
cat("\n", "Principal component scores:", "\n")
print(pcscores)
}
})
makePlot1 <- function() {
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
datpca <- dat[,-1]
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}
par(mfrow = c(2, 1)) # グラフを縦に2つ並べる
barplot(pcloadings[,1], main="PC 1", ylim=c(-1,1), cex.names=0.7)
barplot(pcloadings[,2], main="PC 2", ylim=c(-1,1), cex.names=0.7)
}
output$pcPlot1 <- renderPlot({
print(makePlot1())
})
makePlot2 <- function() {
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
rowvar <- matrix(dat[,1])
rownames(dat) <- rowvar
datpca <- as.matrix(dat[,-1])
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}
plot(pcloadings[,1:2], type="n", xlab="PC 1", ylab="PC 2", cex.axis=0.8, cex.lab=0.8)
text(pcloadings[,1:2], labels=colvar, cex=0.9, adj=c(0.25,1.5))
abline(h=0,lty="dotted")
abline(v=0,lty="dotted")
title(main="Principal Component Analysis: PC Loadings")
}
output$pcPlot2 <- renderPlot({
print(makePlot2())
})
makePlot3 <- function() {
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
rowvar <- matrix(dat[,1])
rownames(dat) <- rowvar
datpca <- as.matrix(dat[,-1])
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}
# PC1のminとmax(x軸の最大・最小)
PCSpc1min <- min(pcscores[,1:2][,1])
PCSpc1min <- PCSpc1min-(abs(PCSpc1min-PCSpc1min*1.25))
PCSpc1max <- max(pcscores[,1:2][,1])
PCSpc1max <- PCSpc1max*1.25
# PC2のminとmax(y軸の最大・最小)
PCSpc2min <- min(pcscores[,1:2][,2])
PCSpc2min <- PCSpc2min-(abs(PCSpc2min-PCSpc2min*1.25))
PCSpc2max <- max(pcscores[,1:2][,2])
PCSpc2max <- PCSpc2max*1.25
plot(pcscores[,1:2], xlab="PC 1", ylab="PC 2", type="n", xlim=c(PCSpc1min, PCSpc1max), ylim=c(PCSpc2min, PCSpc2max), cex.axis=0.8,cex.lab=0.8)
#points(pcscores[,1:2],col="red",pch="*",cex=2)
text(pcscores[,1:2], labels=rowvar, cex=0.9, adj=c(0.25,1.5))
abline(h=0,lty="dotted")
abline(v=0,lty="dotted")
title(main="Principal Component Analysis: PC Scores")
}
output$pcPlot3 <- renderPlot({
print(makePlot3())
})
makePlot4 <- function() {
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
rowvar <- matrix(dat[,1])
rownames(dat) <- rowvar
datpca <- as.matrix(dat[,-1])
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}
# PC1のminとmax(x軸の最大・最小)
pc1min <- min(c(pcloadings[,1:2][,1], pcscores[,1:2][,1]))
pc1min <- pc1min*1.25
pc1max <- max(c(pcloadings[,1:2][,1], pcscores[,1:2][,1]))
pc1max <- pc1max*1.25
# PC2のminとmax(y軸の最大・最小)
pc2min <- min(c(pcloadings[,1:2][,2], pcscores[,1:2][,2]))
pc2min <- pc2min*1.25
pc2max <- max(c(pcloadings[,1:2][,2], pcscores[,1:2][,2]))
pc2max <- pc2max*1.25
# 主成分負荷量と主成分得点を同時にプロット
biplot(pcscores[,1:2], pcloadings[,1:2], var.axes = F, xlim=c(pc1min, pc1max), ylim=c(pc2min, pc2max))
abline(v=0, lty=3) #0で縦に線を引き,破線(lty=3)を引く
abline(h=0, lty=3) #0で横に線を引き,破線(lty=3)を引く
}
output$pcPlot4 <- renderPlot({
print(makePlot4())
})
makePlot5 <- function() {
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
rowvar <- matrix(dat[,1])
rownames(dat) <- rowvar
datpca <- as.matrix(dat[,-1])
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}
dat$PCA1 <- pcscores[,1]
dat$PCA2 <- pcscores[,2]
z <- dat[, c("PCA1","PCA2")]
z.d <- dist(z)^2 # ユークリッド距離の平方(2乗)
result <- hclust(z.d, method="ward") # クラスター分析(Ward法と平方ユークリッド距離使用)
par(mar=c(1,6,3,1))
plot(result, xlab="", sub="") # デンドログラム作図
}
output$pcPlot5 <- renderPlot({
print(makePlot5())
})
check <- reactive({
if (input$rowname == 1) {
dat <- read.csv(text=input$text, sep="\t")
rowvar <- matrix(dat[,1])
rownames(dat) <- rowvar
datpca <- as.matrix(dat[,-1])
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}else{
dat <- read.csv(text=input$text, sep="\t")
rowvar <- rownames(dat)
datpca <- as.matrix(dat)
colvar <- colnames(datpca)
nr <- nrow(datpca)
nc <- ncol(datpca)
maxpc <- min(nr,nc)
respca <- prcomp(datpca, scale=TRUE)
pcloadings <- t(respca$sdev*t(respca$rotation))
pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
}
dat$PCA1 <- pcscores[,1]
dat$PCA2 <- pcscores[,2]
dat$PCA3 <- pcscores[,3]
dat
})
info <- reactive({
info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")# バージョン情報
info2 <- paste("It was executed on ", date(), ".", sep = "")# 実行日時
cat(sprintf(info1), "\n")
cat(sprintf(info2), "\n")
})
output$info.out <- renderPrint({
info()
})
output$textarea.out <- renderPrint({
bs()
})
output$correl.out <- renderPrint({
correl()
})
output$pcaresult.out <- renderPrint({
pcaresult()
})
})
library(shiny)
library(shinyAce)
shinyUI(bootstrapPage(
headerPanel("Principal Component Analysis"),
mainPanel(
tabsetPanel(
tabPanel("Main",
h3("Data"),
p('Input values must be separated by tabs. Copy and paste from Excel/Numbers.'),
p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>Please make sure that your data includes the header (variable names) in the first row.</div></b>")),
strong('Option:'),
checkboxInput("rowname", label = strong("The first column contains case names."), value = T),
aceEditor("text", value="Unit\tWordsPerSentence\tFleschKincaidGrade\tNarrativity\tSyntacticSimplicity\tWordConcreteness\tReferentialCohesion\nU01\t13.5\t6.2\t79.1\t70.19\t60.64\t24.51\nU02\t16.6\t8.1\t57.53\t68.79\t95.91\t59.87\nU03\t15.9\t9.4\t31.21\t54.38\t29.81\t6.43\nU04\t17.5\t9.8\t42.07\t56.36\t47.21\t27.76\nU05\t14.9\t8.8\t53.19\t72.24\t58.32\t13.57\nU06\t16.9\t10\t34.83\t63.31\t57.93\t5.82\nU07\t20\t7.7\t41.29\t33.72\t84.38\t93.57\nU08\t14.3\t7.1\t69.85\t82.38\t35.2\t3.22\nU09\t16.7\t7.6\t89.07\t34.46\t89.62\t32.64\nU10\t14\t6.5\t44.04\t75.49\t71.9\t19.49\nU11\t17.4\t6\t93.82\t50.8\t57.53\t47.61\nU12\t21.5\t11.1\t37.83\t35.57\t43.64\t20.9\nU13\t18.5\t10.2\t28.77\t59.48\t32.28\t18.41\nU14\t17.6\t9.5\t64.8\t42.47\t65.91\t50.4\nU15\t20.2\t9.9\t28.43\t34.46\t84.61\t15.15",
mode="r", theme="cobalt"),
br(),
h3("Basic statistics"),
verbatimTextOutput("textarea.out"),
br(),
h3("Correlation"),
verbatimTextOutput("correl.out"),
br(),
strong("Scatter plot matrices"),
br(),
plotOutput("corPlot"),
br(),
h3("Scree plot"),
plotOutput("sPlot", width="80%"),
br(),
br(),
h3("Results of principal component analysis"),
verbatimTextOutput("pcaresult.out"),
br(),
br(),
h3("Plot"),
plotOutput("pcPlot1", height = "600px"),
br(),
plotOutput("pcPlot2", height = "500px"),
br(),
plotOutput("pcPlot3", height = "500px"),
br(),
h4("Biplot"),
plotOutput("pcPlot4", height = "700px"),
br(),
h4("Cluster analysis using the principal component scores"),
p("(Ward method with the squared Euclidean distance technique)"),
plotOutput("pcPlot5"),
br(),
br(),
strong('R session info'),
verbatimTextOutput("info.out")
),
tabPanel("About",
strong('Note'),
p('This web application is developed with',
a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
''),
br(),
strong('List of Packages Used'), br(),
code('library(shiny)'),br(),
code('library(shinyAce)'),br(),
code('library(psych)'),br(),
br(),
strong('Code'),
p('Source code for this application is mostly based on',
a("this website maintained by Dr. Tabata.", href='http://www.lang.osaka-u.ac.jp/~tabata/JAECS2004/multi.html', target="_blank")),
p('The code for this web application is available at',
a('GitHub.', href='https://github.com/mizumot/pca', target="_blank")),
p('If you want to run this code on your computer (in a local R session), run the code below:',
br(),
code('library(shiny)'),br(),
code('runGitHub("pca","mizumot")')
),
br(),
strong('Citation in Publications'),
p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp'),
br(),
strong('Article'),
p('Mizumoto, A., & Plonsky, L. (2015).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract', target="_blank"), em('Applied Linguistics,'), 'Advance online publication. doi:10.1093/applin/amv025'),
br(),
strong('Recommended'),
br(),
a("The handbook of Research in Foreign Language Learning and Teaching(Takeuchi & Mizumoto, 2012)", href='http://mizumot.com/handbook/', target="_blank"),
br(),
br(),
p('To learn more about R, I suggest this excellent and free e-book (pdf),',
a("A Guide to Doing Statistics in Second Language Research Using R,", href="http://cw.routledge.com/textbooks/9780805861853/guide-to-R.asp", target="_blank"),
'written by Dr. Jenifer Larson-Hall.'),
p('Also, if you are a cool Mac user and want to use R with GUI,',
a("MacR", href="https://sites.google.com/site/casualmacr/", target="_blank"),
'is defenitely the way to go!'),
br(),
strong('Author'),
p(a("Atsushi MIZUMOTO,", href="http://mizumot.com", target="_blank"),' Ph.D.',br(),
'Professor of Applied Linguistics',br(),
'Faculty of Foreign Language Studies /',br(),
'Graduate School of Foreign Language Education and Research,',br(),
'Kansai University, Osaka, Japan'),
br(),
a(img(src="http://i.creativecommons.org/p/mark/1.0/80x15.png"), target="_blank", href="http://creativecommons.org/publicdomain/mark/1.0/"),
p(br())
)
))
))