Principal Component Analysis

Data

Input values must be separated by tabs. Copy and paste from Excel/Numbers.

Please make sure that your data includes the header (variable names) in the first row.

Option:

                
                

Basic statistics


                

Correlation


                
Scatter plot matrices

Scree plot



Results of principal component analysis


                

Plot




Biplot


Cluster analysis using the principal component scores

(Ward method with the squared Euclidean distance technique)



R session info

              
Note

This web application is developed with Shiny.


List of Packages Used
library(shiny)
library(shinyAce)
library(psych)

Code

Source code for this application is mostly based on this website maintained by Dr. Tabata.

The code for this web application is available at GitHub.

If you want to run this code on your computer (in a local R session), run the code below:
library(shiny)
runGitHub("pca","mizumot")


Citation in Publications

Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp


Article

Mizumoto, A., & Plonsky, L. (2015). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, Advance online publication. doi:10.1093/applin/amv025


Recommended
The handbook of Research in Foreign Language Learning and Teaching(Takeuchi & Mizumoto, 2012)

To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall.

Also, if you are a cool Mac user and want to use R with GUI, MacR is defenitely the way to go!


Author

Atsushi MIZUMOTO, Ph.D.
Associate Professor of Applied Linguistics
Faculty of Foreign Language Studies /
Graduate School of Foreign Language Education and Research,
Kansai University, Osaka, Japan



Code for "Principal Component Analysis"
by Atsushi Mizumoto

show with app
library(shiny)
library(shinyAce)
library(psych)



shinyServer(function(input, output) {
    
    
    
    bs <- reactive({
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        describe(x)[2:13]
    })
    
    
    
    correl <- reactive({
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        round(cor(cbind(x), use = "complete"),3)
    })
    
    
    
    makecorPlot <- function(){
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        pairs.panels(x)
    }
    
    output$corPlot <- renderPlot({
        print(makecorPlot())
    })


    
    makesPlot <- function(){
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            dat <- dat[, -1]
        }else{
            dat <- read.csv(text=input$text, sep="\t")
        }
        VSS.scree(dat, main = "")
    }
    
    output$sPlot <- renderPlot({
        print(makesPlot())
    })
    
    
    
    
    
    
    pcaresult <- reactive({
        options(digits=3)
        
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)

            info <- summary(respca)
            eigen <- info[[1]]^2
            newinfo <- rbind("Eigen values"=eigen, info$importance)
            cat("Importance of components:", "\n")
            print(newinfo)
            
            pcloadings <- t(respca$sdev*t(respca$rotation))
            cat("\n", "Principal component loadings:", "\n")
            print(pcloadings)
            
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
            cat("\n", "Principal component scores:", "\n")
            print(pcscores)
        
        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            
            info <- summary(respca)
            eigen <- info[[1]]^2
            newinfo <- rbind("Eigen values"=eigen, info$importance)
            cat("Importance of components:", "\n")
            print(newinfo)
            
            pcloadings <- t(respca$sdev*t(respca$rotation))
            cat("\n", "Principal component loadings:", "\n")
            print(pcloadings)
            
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
            cat("\n", "Principal component scores:", "\n")
            print(pcscores)
        }
    })
    
    
    
    makePlot1 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            datpca <- dat[,-1]
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        par(mfrow = c(2, 1))   # グラフを縦に2つ並べる
        barplot(pcloadings[,1], main="PC 1", ylim=c(-1,1), cex.names=0.7)
        barplot(pcloadings[,2], main="PC 2", ylim=c(-1,1), cex.names=0.7)
    }



    output$pcPlot1 <- renderPlot({
        print(makePlot1())
    })
    
    
    
    makePlot2 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
        }


        plot(pcloadings[,1:2], type="n", xlab="PC 1", ylab="PC 2", cex.axis=0.8, cex.lab=0.8)
        text(pcloadings[,1:2], labels=colvar, cex=0.9, adj=c(0.25,1.5))
        abline(h=0,lty="dotted")
        abline(v=0,lty="dotted")
        title(main="Principal Component Analysis: PC Loadings")
    }
    
    
    output$pcPlot2 <- renderPlot({
        print(makePlot2())
    })
    
    
    
    makePlot3 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        # PC1のminとmax(x軸の最大・最小)
        PCSpc1min <- min(pcscores[,1:2][,1])
        PCSpc1min <- PCSpc1min-(abs(PCSpc1min-PCSpc1min*1.25))
        PCSpc1max <- max(pcscores[,1:2][,1])
        PCSpc1max <- PCSpc1max*1.25
        # PC2のminとmax(y軸の最大・最小)
        PCSpc2min <- min(pcscores[,1:2][,2])
        PCSpc2min <- PCSpc2min-(abs(PCSpc2min-PCSpc2min*1.25))
        PCSpc2max <- max(pcscores[,1:2][,2])
        PCSpc2max <- PCSpc2max*1.25
        
        plot(pcscores[,1:2], xlab="PC 1", ylab="PC 2", type="n", xlim=c(PCSpc1min, PCSpc1max), ylim=c(PCSpc2min, PCSpc2max), cex.axis=0.8,cex.lab=0.8)
        #points(pcscores[,1:2],col="red",pch="*",cex=2)
        text(pcscores[,1:2], labels=rowvar, cex=0.9, adj=c(0.25,1.5))
        abline(h=0,lty="dotted")
        abline(v=0,lty="dotted")
        title(main="Principal Component Analysis: PC Scores")
    }
    
    
    output$pcPlot3 <- renderPlot({
        print(makePlot3())
    })
    
    
    
    makePlot4 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        # PC1のminとmax(x軸の最大・最小)
        pc1min <- min(c(pcloadings[,1:2][,1], pcscores[,1:2][,1]))
        pc1min <- pc1min*1.25
        pc1max <- max(c(pcloadings[,1:2][,1], pcscores[,1:2][,1]))
        pc1max <- pc1max*1.25
        # PC2のminとmax(y軸の最大・最小)
        pc2min <- min(c(pcloadings[,1:2][,2], pcscores[,1:2][,2]))
        pc2min <- pc2min*1.25
        pc2max <- max(c(pcloadings[,1:2][,2], pcscores[,1:2][,2]))
        pc2max <- pc2max*1.25
        
        # 主成分負荷量と主成分得点を同時にプロット
        biplot(pcscores[,1:2], pcloadings[,1:2], var.axes = F, xlim=c(pc1min, pc1max), ylim=c(pc2min, pc2max))
        abline(v=0, lty=3) #0で縦に線を引き,破線(lty=3)を引く
        abline(h=0, lty=3) #0で横に線を引き,破線(lty=3)を引く
    }
    
    
    output$pcPlot4 <- renderPlot({
        print(makePlot4())
    })
    
    
    
    makePlot5 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        dat$PCA1 <- pcscores[,1]
        dat$PCA2 <- pcscores[,2]
        
        z <- dat[, c("PCA1","PCA2")]
        z.d <- dist(z)^2    # ユークリッド距離の平方(2乗)
        result <- hclust(z.d, method="ward") # クラスター分析(Ward法と平方ユークリッド距離使用)
        par(mar=c(1,6,3,1))
        plot(result, xlab="", sub="") # デンドログラム作図
    }
    
    
    output$pcPlot5 <- renderPlot({
        print(makePlot5())
    })
    
    
    
    check <- reactive({
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        dat$PCA1 <- pcscores[,1]
        dat$PCA2 <- pcscores[,2]
        dat$PCA3 <- pcscores[,3]
        dat
    })




    info <- reactive({
        info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")# バージョン情報
        info2 <- paste("It was executed on ", date(), ".", sep = "")# 実行日時
        cat(sprintf(info1), "\n")
        cat(sprintf(info2), "\n")
    })
    
    output$info.out <- renderPrint({
        info()
    })
    




    output$textarea.out <- renderPrint({
        bs()
    })

    output$correl.out <- renderPrint({
        correl()
    })
    
    output$pcaresult.out <- renderPrint({
        pcaresult()
    })
    

})
library(shiny)
library(shinyAce)


shinyUI(bootstrapPage(


    headerPanel("Principal Component Analysis"),

    mainPanel(
        tabsetPanel(

        tabPanel("Main",

            h3("Data"),
            p('Input values must be separated by tabs. Copy and paste from Excel/Numbers.'),
            p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>Please make sure that your data includes the header (variable names) in the first row.</div></b>")),

            strong('Option:'),
            checkboxInput("rowname", label = strong("The first column contains case names."), value = T),

            aceEditor("text", value="Unit\tWordsPerSentence\tFleschKincaidGrade\tNarrativity\tSyntacticSimplicity\tWordConcreteness\tReferentialCohesion\nU01\t13.5\t6.2\t79.1\t70.19\t60.64\t24.51\nU02\t16.6\t8.1\t57.53\t68.79\t95.91\t59.87\nU03\t15.9\t9.4\t31.21\t54.38\t29.81\t6.43\nU04\t17.5\t9.8\t42.07\t56.36\t47.21\t27.76\nU05\t14.9\t8.8\t53.19\t72.24\t58.32\t13.57\nU06\t16.9\t10\t34.83\t63.31\t57.93\t5.82\nU07\t20\t7.7\t41.29\t33.72\t84.38\t93.57\nU08\t14.3\t7.1\t69.85\t82.38\t35.2\t3.22\nU09\t16.7\t7.6\t89.07\t34.46\t89.62\t32.64\nU10\t14\t6.5\t44.04\t75.49\t71.9\t19.49\nU11\t17.4\t6\t93.82\t50.8\t57.53\t47.61\nU12\t21.5\t11.1\t37.83\t35.57\t43.64\t20.9\nU13\t18.5\t10.2\t28.77\t59.48\t32.28\t18.41\nU14\t17.6\t9.5\t64.8\t42.47\t65.91\t50.4\nU15\t20.2\t9.9\t28.43\t34.46\t84.61\t15.15",
                mode="r", theme="cobalt"),

            br(),

            h3("Basic statistics"),
            verbatimTextOutput("textarea.out"),

            br(),

            h3("Correlation"),
            verbatimTextOutput("correl.out"),

            br(),

            strong("Scatter plot matrices"),

            br(),


            plotOutput("corPlot"),

            br(),

            h3("Scree plot"),


            plotOutput("sPlot", width="80%"),

            br(),
            br(),

            h3("Results of principal component analysis"),
            verbatimTextOutput("pcaresult.out"),

            br(),
            br(),

            h3("Plot"),

            plotOutput("pcPlot1", height = "600px"),

            br(),


            plotOutput("pcPlot2", height = "500px"),

            br(),


            plotOutput("pcPlot3", height = "500px"),

            br(),

            h4("Biplot"),

            plotOutput("pcPlot4", height = "700px"),

            br(),

            h4("Cluster analysis using the principal component scores"),
            p("(Ward method with the squared Euclidean distance technique)"),

            plotOutput("pcPlot5"),

            br(),
            br(),

            strong('R session info'),
            verbatimTextOutput("info.out")

            ),


        tabPanel("About",

            strong('Note'),
            p('This web application is developed with',
            a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
            ''),

            br(),

            strong('List of Packages Used'), br(),
            code('library(shiny)'),br(),
            code('library(shinyAce)'),br(),
            code('library(psych)'),br(),

            br(),

            strong('Code'),

            p('Source code for this application is mostly based on',
            a("this website maintained by Dr. Tabata.", href='http://www.lang.osaka-u.ac.jp/~tabata/JAECS2004/multi.html', target="_blank")),

            p('The code for this web application is available at',
            a('GitHub.', href='https://github.com/mizumot/pca', target="_blank")),

            p('If you want to run this code on your computer (in a local R session), run the code below:',
            br(),
            code('library(shiny)'),br(),
            code('runGitHub("pca","mizumot")')
            ),

            br(),

            strong('Citation in Publications'),
            p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp'),

            br(),

            strong('Article'),
            p('Mizumoto, A., & Plonsky, L. (2015).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract', target="_blank"), em('Applied Linguistics,'), 'Advance online publication. doi:10.1093/applin/amv025'),

            br(),

            strong('Recommended'),
            br(),

            a("The handbook of Research in Foreign Language Learning and Teaching(Takeuchi & Mizumoto, 2012)", href='http://mizumot.com/handbook/', target="_blank"),

            br(),
            br(),

            p('To learn more about R, I suggest this excellent and free e-book (pdf),',
            a("A Guide to Doing Statistics in Second Language Research Using R,", href="http://cw.routledge.com/textbooks/9780805861853/guide-to-R.asp", target="_blank"),
            'written by Dr. Jenifer Larson-Hall.'),

            p('Also, if you are a cool Mac user and want to use R with GUI,',
            a("MacR", href="https://sites.google.com/site/casualmacr/", target="_blank"),
            'is defenitely the way to go!'),

            br(),

            strong('Author'),
            p(a("Atsushi MIZUMOTO,", href="http://mizumot.com", target="_blank"),' Ph.D.',br(),
            'Associate Professor of Applied Linguistics',br(),
            'Faculty of Foreign Language Studies /',br(),
            'Graduate School of Foreign Language Education and Research,',br(),
            'Kansai University, Osaka, Japan'),

            br(),

            a(img(src="http://i.creativecommons.org/p/mark/1.0/80x15.png"), target="_blank", href="http://creativecommons.org/publicdomain/mark/1.0/"),

            p(br())

            )

))
))
Code license: GPL-3