Principal Component Analysis

Code for "Principal Component Analysis"
by Atsushi Mizumoto

server.R
ui.R

library(shiny)
library(shinyAce)
library(psych)



shinyServer(function(input, output) {
    
    
    
    bs <- reactive({
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        describe(x)[2:13]
    })
    
    
    
    correl <- reactive({
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        round(cor(cbind(x), use = "complete"),3)
    })
    
    
    
    makecorPlot <- function(){
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        pairs.panels(x)
    }
    
    output$corPlot <- renderPlot({
        print(makecorPlot())
    })


    
    makesPlot <- function(){
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            dat <- dat[, -1]
        }else{
            dat <- read.csv(text=input$text, sep="\t")
        }
        VSS.scree(dat, main = "")
    }
    
    output$sPlot <- renderPlot({
        print(makesPlot())
    })
    
    
    
    
    
    
    pcaresult <- reactive({
        options(digits=3)
        
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)

            info <- summary(respca)
            eigen <- info[[1]]^2
            newinfo <- rbind("Eigen values"=eigen, info$importance)
            cat("Importance of components:", "\n")
            print(newinfo)
            
            pcloadings <- t(respca$sdev*t(respca$rotation))
            cat("\n", "Principal component loadings:", "\n")
            print(pcloadings)
            
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
            cat("\n", "Principal component scores:", "\n")
            print(pcscores)
        
        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            
            info <- summary(respca)
            eigen <- info[[1]]^2
            newinfo <- rbind("Eigen values"=eigen, info$importance)
            cat("Importance of components:", "\n")
            print(newinfo)
            
            pcloadings <- t(respca$sdev*t(respca$rotation))
            cat("\n", "Principal component loadings:", "\n")
            print(pcloadings)
            
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
            cat("\n", "Principal component scores:", "\n")
            print(pcscores)
        }
    })
    
    
    
    makePlot1 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            datpca <- dat[,-1]
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        par(mfrow = c(2, 1))   # グラフを縦に2つ並べる
        barplot(pcloadings[,1], main="PC 1", ylim=c(-1,1), cex.names=0.7)
        barplot(pcloadings[,2], main="PC 2", ylim=c(-1,1), cex.names=0.7)
    }



    output$pcPlot1 <- renderPlot({
        print(makePlot1())
    })
    
    
    
    makePlot2 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))
        }


        plot(pcloadings[,1:2], type="n", xlab="PC 1", ylab="PC 2", cex.axis=0.8, cex.lab=0.8)
        text(pcloadings[,1:2], labels=colvar, cex=0.9, adj=c(0.25,1.5))
        abline(h=0,lty="dotted")
        abline(v=0,lty="dotted")
        title(main="Principal Component Analysis: PC Loadings")
    }
    
    
    output$pcPlot2 <- renderPlot({
        print(makePlot2())
    })
    
    
    
    makePlot3 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        # PC1のminとmax（x軸の最大・最小）
        PCSpc1min <- min(pcscores[,1:2][,1])
        PCSpc1min <- PCSpc1min-(abs(PCSpc1min-PCSpc1min*1.25))
        PCSpc1max <- max(pcscores[,1:2][,1])
        PCSpc1max <- PCSpc1max*1.25
        # PC2のminとmax（y軸の最大・最小）
        PCSpc2min <- min(pcscores[,1:2][,2])
        PCSpc2min <- PCSpc2min-(abs(PCSpc2min-PCSpc2min*1.25))
        PCSpc2max <- max(pcscores[,1:2][,2])
        PCSpc2max <- PCSpc2max*1.25
        
        plot(pcscores[,1:2], xlab="PC 1", ylab="PC 2", type="n", xlim=c(PCSpc1min, PCSpc1max), ylim=c(PCSpc2min, PCSpc2max), cex.axis=0.8,cex.lab=0.8)
        #points(pcscores[,1:2],col="red",pch="*",cex=2)
        text(pcscores[,1:2], labels=rowvar, cex=0.9, adj=c(0.25,1.5))
        abline(h=0,lty="dotted")
        abline(v=0,lty="dotted")
        title(main="Principal Component Analysis: PC Scores")
    }
    
    
    output$pcPlot3 <- renderPlot({
        print(makePlot3())
    })
    
    
    
    makePlot4 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        # PC1のminとmax（x軸の最大・最小）
        pc1min <- min(c(pcloadings[,1:2][,1], pcscores[,1:2][,1]))
        pc1min <- pc1min*1.25
        pc1max <- max(c(pcloadings[,1:2][,1], pcscores[,1:2][,1]))
        pc1max <- pc1max*1.25
        # PC2のminとmax（y軸の最大・最小）
        pc2min <- min(c(pcloadings[,1:2][,2], pcscores[,1:2][,2]))
        pc2min <- pc2min*1.25
        pc2max <- max(c(pcloadings[,1:2][,2], pcscores[,1:2][,2]))
        pc2max <- pc2max*1.25
        
        # 主成分負荷量と主成分得点を同時にプロット
        biplot(pcscores[,1:2], pcloadings[,1:2], var.axes = F, xlim=c(pc1min, pc1max), ylim=c(pc2min, pc2max))
        abline(v=0, lty=3) #0で縦に線を引き，破線（lty=3）を引く
        abline(h=0, lty=3) #0で横に線を引き，破線（lty=3）を引く
    }
    
    
    output$pcPlot4 <- renderPlot({
        print(makePlot4())
    })
    
    
    
    makePlot5 <- function() {
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        dat$PCA1 <- pcscores[,1]
        dat$PCA2 <- pcscores[,2]
        
        z <- dat[, c("PCA1","PCA2")]
        z.d <- dist(z)^2    # ユークリッド距離の平方（2乗)
        result <- hclust(z.d, method="ward") # クラスター分析（Ward法と平方ユークリッド距離使用）
        par(mar=c(1,6,3,1))
        plot(result, xlab="", sub="") # デンドログラム作図
    }
    
    
    output$pcPlot5 <- renderPlot({
        print(makePlot5())
    })
    
    
    
    check <- reactive({
        if (input$rowname == 1) {
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- matrix(dat[,1])
            rownames(dat) <- rowvar
            datpca <- as.matrix(dat[,-1])
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }else{
            dat <- read.csv(text=input$text, sep="\t")
            rowvar <- rownames(dat)
            datpca <- as.matrix(dat)
            colvar <- colnames(datpca)
            nr <- nrow(datpca)
            nc <- ncol(datpca)
            maxpc <- min(nr,nc)
            respca <- prcomp(datpca, scale=TRUE)
            pcloadings <- t(respca$sdev*t(respca$rotation))
            pcscores<-scale(datpca)%*%respca$rotation*sqrt(nr/(nr-1))

        }
        
        dat$PCA1 <- pcscores[,1]
        dat$PCA2 <- pcscores[,2]
        dat$PCA3 <- pcscores[,3]
        dat
    })




    info <- reactive({
        info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")# バージョン情報
        info2 <- paste("It was executed on ", date(), ".", sep = "")# 実行日時
        cat(sprintf(info1), "\n")
        cat(sprintf(info2), "\n")
    })
    
    output$info.out <- renderPrint({
        info()
    })
    




    output$textarea.out <- renderPrint({
        bs()
    })

    output$correl.out <- renderPrint({
        correl()
    })
    
    output$pcaresult.out <- renderPrint({
        pcaresult()
    })
    

})

library(shiny)
library(shinyAce)


shinyUI(bootstrapPage(


    headerPanel("Principal Component Analysis"),

    mainPanel(
        tabsetPanel(

        tabPanel("Main",

            h3("Data"),
            p('Input values must be separated by tabs. Copy and paste from Excel/Numbers.'),
            p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>Please make sure that your data includes the header (variable names) in the first row.</div></b>")),

            strong('Option:'),
            checkboxInput("rowname", label = strong("The first column contains case names."), value = T),

            aceEditor("text", value="Unit\tWordsPerSentence\tFleschKincaidGrade\tNarrativity\tSyntacticSimplicity\tWordConcreteness\tReferentialCohesion\nU01\t13.5\t6.2\t79.1\t70.19\t60.64\t24.51\nU02\t16.6\t8.1\t57.53\t68.79\t95.91\t59.87\nU03\t15.9\t9.4\t31.21\t54.38\t29.81\t6.43\nU04\t17.5\t9.8\t42.07\t56.36\t47.21\t27.76\nU05\t14.9\t8.8\t53.19\t72.24\t58.32\t13.57\nU06\t16.9\t10\t34.83\t63.31\t57.93\t5.82\nU07\t20\t7.7\t41.29\t33.72\t84.38\t93.57\nU08\t14.3\t7.1\t69.85\t82.38\t35.2\t3.22\nU09\t16.7\t7.6\t89.07\t34.46\t89.62\t32.64\nU10\t14\t6.5\t44.04\t75.49\t71.9\t19.49\nU11\t17.4\t6\t93.82\t50.8\t57.53\t47.61\nU12\t21.5\t11.1\t37.83\t35.57\t43.64\t20.9\nU13\t18.5\t10.2\t28.77\t59.48\t32.28\t18.41\nU14\t17.6\t9.5\t64.8\t42.47\t65.91\t50.4\nU15\t20.2\t9.9\t28.43\t34.46\t84.61\t15.15",
                mode="r", theme="cobalt"),

            br(),

            h3("Basic statistics"),
            verbatimTextOutput("textarea.out"),

            br(),

            h3("Correlation"),
            verbatimTextOutput("correl.out"),

            br(),

            strong("Scatter plot matrices"),

            br(),


            plotOutput("corPlot"),

            br(),

            h3("Scree plot"),


            plotOutput("sPlot", width="80%"),

            br(),
            br(),

            h3("Results of principal component analysis"),
            verbatimTextOutput("pcaresult.out"),

            br(),
            br(),

            h3("Plot"),

            plotOutput("pcPlot1", height = "600px"),

            br(),


            plotOutput("pcPlot2", height = "500px"),

            br(),


            plotOutput("pcPlot3", height = "500px"),

            br(),

            h4("Biplot"),

            plotOutput("pcPlot4", height = "700px"),

            br(),

            h4("Cluster analysis using the principal component scores"),
            p("(Ward method with the squared Euclidean distance technique)"),

            plotOutput("pcPlot5"),

            br(),
            br(),

            strong('R session info'),
            verbatimTextOutput("info.out")

            ),


        tabPanel("About",

            strong('Note'),
            p('This web application is developed with',
            a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
            ''),

            br(),

            strong('List of Packages Used'), br(),
            code('library(shiny)'),br(),
            code('library(shinyAce)'),br(),
            code('library(psych)'),br(),

            br(),

            strong('Code'),

            p('Source code for this application is mostly based on',
            a("this website maintained by Dr. Tabata.", href='http://www.lang.osaka-u.ac.jp/~tabata/JAECS2004/multi.html', target="_blank")),

            p('The code for this web application is available at',
            a('GitHub.', href='https://github.com/mizumot/pca', target="_blank")),

            p('If you want to run this code on your computer (in a local R session), run the code below:',
            br(),
            code('library(shiny)'),br(),
            code('runGitHub("pca","mizumot")')
            ),

            br(),

            strong('Citation in Publications'),
            p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp'),

            br(),

            strong('Article'),
            p('Mizumoto, A., & Plonsky, L. (2015).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract', target="_blank"), em('Applied Linguistics,'), 'Advance online publication. doi:10.1093/applin/amv025'),

            br(),

            strong('Recommended'),
            br(),

            a("The handbook of Research in Foreign Language Learning and Teaching(Takeuchi & Mizumoto, 2012)", href='http://mizumot.com/handbook/', target="_blank"),

            br(),
            br(),

            p('To learn more about R, I suggest this excellent and free e-book (pdf),',
            a("A Guide to Doing Statistics in Second Language Research Using R,", href="http://cw.routledge.com/textbooks/9780805861853/guide-to-R.asp", target="_blank"),
            'written by Dr. Jenifer Larson-Hall.'),

            p('Also, if you are a cool Mac user and want to use R with GUI,',
            a("MacR", href="https://sites.google.com/site/casualmacr/", target="_blank"),
            'is defenitely the way to go!'),

            br(),

            strong('Author'),
            p(a("Atsushi MIZUMOTO,", href="http://mizumot.com", target="_blank"),' Ph.D.',br(),
            'Professor of Applied Linguistics',br(),
            'Faculty of Foreign Language Studies /',br(),
            'Graduate School of Foreign Language Education and Research,',br(),
            'Kansai University, Osaka, Japan'),

            br(),

            a(img(src="http://i.creativecommons.org/p/mark/1.0/80x15.png"), target="_blank", href="http://creativecommons.org/publicdomain/mark/1.0/"),

            p(br())

            )

))
))

Code license: GPL-3

Principal Component Analysis

Data

Basic statistics

Correlation

Scree plot

Results of principal component analysis

Plot

Biplot

Cluster analysis using the principal component scores

Code for "Principal Component Analysis" by Atsushi Mizumoto

Code for "Principal Component Analysis"
by Atsushi Mizumoto