Cluster Analysis

Loading...
Scores:



Data

Input values must be separated by tabs. Copy and paste from Excel/Numbers.

Please make sure that your data includes the header (variable names) in the first row.

Option:

                
                

Basic statistics


                

Correlation


                
Scatter plot matrices

Cluster analysis


                

Specifying the number of clusters



Basic statistics of each cluster (Applicable only for case clustering)

                
Profile plot (Applicable only for case clustering)


R session info

              
Note

This web application is developed with Shiny.


List of Packages Used
library(shiny)
library(shinyAce)
library(psych)
library(amap)

Code

Source code for this application is based on "The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012).

The code for this web application is available at GitHub.

If you want to run this code on your computer (in a local R session), run the code below:
library(shiny)
runGitHub("cluster","mizumot")


Citation in Publications

Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp


Article

Mizumoto, A., & Plonsky, L. (2015). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, Advance online publication. doi:10.1093/applin/amv025


Recommended

To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall.

Also, if you are a cool Mac user and want to use R with GUI, MacR is defenitely the way to go!


Author

Atsushi MIZUMOTO, Ph.D.
Professor of Applied Linguistics
Faculty of Foreign Language Studies /
Graduate School of Foreign Language Education and Research,
Kansai University, Osaka, Japan



Code for "Cluster Analysis"
by Atsushi Mizumoto

show with app
library(shiny)
library(shinyAce)
library(psych)
library(amap)


shinyServer(function(input, output) {



    bs <- reactive({
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        describe(x)[2:13]
    })
    
    
    
    correl <- reactive({
        
        if (input$rowname == 1) {
            x <- read.csv(text=input$text, sep="\t")
            x <- x[, -1]
        }else{
            x <- read.csv(text=input$text, sep="\t")
        }
        
        type <- switch(input$method,
                       Pearson = "pearson",
                       Spearman = "spearman",
                       Kendall = "kendall")
            
        round(cor(cbind(x), method = type, use = "complete"),3)
    })
    
    
    
    
    
    makecorPlot <- function(){
       
       if (input$rowname == 1) {
           x <- read.csv(text=input$text, sep="\t")
           x <- x[, -1]
       }else{
           x <- read.csv(text=input$text, sep="\t")
       }
       
       type <- switch(input$method,
                      Pearson = "pearson",
                      Spearman = "spearman",
                      Kendall = "kendall")
           
       pairs.panels(x, method = type)
    }
    
    
    output$corPlot <- renderPlot({
        print(makecorPlot())
    })

    
    
    
    
    clusteranalysis <- reactive({
        
        
        if (input$stdz == 0) { # 標準化なし
            
            if (input$rowname == 1) {
                dat <- read.csv(text=input$text, sep="\t")
                x <- dat[, -1]
            } else {
                x <- read.csv(text=input$text, sep="\t")
            }
            
                if (input$type == "case") { # ケースクラスターと変数クラスターの違い
                    z <- as.matrix(x)
                    z <- data.frame(z)
                    rownames(z) <- dat[,1]
                    
                } else {
                    z <- as.matrix(x)
                    z <- data.frame(z)
                    z <- t(z)
                }
            
                if (input$distancce == "squared.euclidean") { # 平方ユークリッド距離
                    
                    linkage <- switch(input$linkage, ward = "ward", single = "single", complete = "complete", average = "average",
                        mcquitty = "mcquitty", median = "median", centroid = "centroid")
                    
                    z.d <- dist(z)^2
                    hc <- hclust(z.d, method=linkage)
                
                } else { # 平方ユークリッド距離「以外」
                    
                    linkage <- switch(input$linkage, ward = "ward", single = "single", complete = "complete", average = "average",
                         mcquitty = "mcquitty", median = "median", centroid = "centroid")
                    
                    distanceM <- switch(input$distancce, euclidean = "euclidean", maximum = "maximum",
                         manhattan = "manhattan", canberra = "canberra", binary = "binary", pearson ="pearson",
                         abspearson = "abspearson", correlation = "correlation", abscorrelation = "abscorrelation",
                         spearman = "spearman", kendall = "kendall")
                    
                    hc <- hcluster(z, method = distanceM, link = linkage)

                }
        
        } else { # 標準化あり
            
            if (input$rowname == 1) {
                dat <- read.csv(text=input$text, sep="\t")
                x <- dat[, -1]
            }else{
                x <- read.csv(text=input$text, sep="\t")
            }
            
                if (input$type == "case") { # ケースクラスターと変数クラスターの違い
                    x <- as.matrix(x)
                    z <- scale(x) # ここで標準化
                    z <- data.frame(z)
                    rownames(z) <- dat[,1]

                } else {
                    x <- as.matrix(x)
                    z <- scale(x) # ここで標準化
                    z <- data.frame(z)
                    z <- t(z)
                }
            
            
                if (input$distancce == "squared.euclidean") { # 平方ユークリッド距離
                   
                   linkage <- switch(input$linkage, ward = "ward", single = "single", complete = "complete", average = "average",
                   mcquitty = "mcquitty", median = "median", centroid = "centroid")
                   
                   z.d <- dist(z)^2
                   hc <- hclust(z.d, method=linkage)
                   
                } else { # 平方ユークリッド距離「以外」
                   
                   linkage <- switch(input$linkage, ward = "ward", single = "single", complete = "complete", average = "average", mcquitty = "mcquitty", median = "median", centroid = "centroid")
                   
                   distanceM <- switch(input$distancce, euclidean = "euclidean", maximum = "maximum", manhattan = "manhattan", canberra = "canberra", binary = "binary", pearson ="pearson",
                        abspearson = "abspearson", correlation = "correlation", abscorrelation = "abscorrelation",
                        spearman = "spearman", kendall = "kendall")
                   
                   hc <- hcluster(z, method = distanceM, link = linkage)
                }
        }
        
        list(hc = hc) #他で使用するため
    })





    makeclusterPlot <- function(){
        
        res <- clusteranalysis()$hc
        plot(res, las = 1, hang = -1, xlab="", sub="")
    
    }
    
    output$clusterPlot <- renderPlot({
        print(makeclusterPlot()) # 上の function を参照する指定
    })


    
    
    
    specifiedCluster <- reactive({
        
        res <- clusteranalysis()$hc
        
        nclust <- input$numspec # クラスター数
        cluster <- cutree(res, k=nclust)
        cluster <- factor(cluster)
        
        
        if (input$type == "case") { # ケースクラスターの場合
            if (input$rowname == 1) {
                x <- read.csv(text=input$text, sep="\t")
                x <- x[, -1]
            } else {
                x <- read.csv(text=input$text, sep="\t")
            }
                x <- as.matrix(x)
                x <- data.frame(x)
                # z <- scale(x) # ここで標準化
                # z <- data.frame(z)
        
            xx <- cbind(x, cluster)        # 素点のデータフレーム
            # zz <- cbind(z, cluster)
        
            describeBy(xx[1:ncol(xx)-1], group = cluster)
        
        } else { # 変数クラスターの場合は何も表示しない
            cat("\n")
        }
    
    })





    makespecPlot <- function(){
    
        res <- clusteranalysis()$hc
        plot(res, las = 1, hang = -1, xlab="", sub="")
        
        nclust <- input$numspec # クラスター数
        rect.hclust(res, k=nclust, border="red") # rect.hclustでクラスターごとにいろをつける
    }
    
    output$specPlot <- renderPlot({
        
        print(makespecPlot()) # 上の function を参照する指定

    })


    
    
    
    makeProfilePlot <- function(){
        
        res <- clusteranalysis()$hc
        
        nclust <- input$numspec # クラスター数
        cluster <- cutree(res, k=nclust)
        cluster <- factor(cluster)
        
        
        if (input$type == "case") { # ケースクラスターの場合
            if (input$rowname == 1) {
                x <- read.csv(text=input$text, sep="\t")
                x <- x[, -1]
            } else {
                x <- read.csv(text=input$text, sep="\t")
            }
            x <- as.matrix(x)
            x <- data.frame(x)
            z <- scale(x) # ここで標準化
            z <- data.frame(z)
            
            x <- cbind(x, cluster)  # 素点のデータフレーム
            z <- cbind(z, cluster)  # 標準化得点のデータフレーム
            
            means <- aggregate(z[, 1:ncol(x)-1], by=list(z$cluster), FUN=mean)
            means <- means[,-1]
            
            minmax <- unlist(means)
            minimum <- min(minmax, na.rm = TRUE)
            maximum <- max(minmax, na.rm = TRUE)
            
            lbls <- c()
            for (i in 1:nclust) {
                lbls[i] <- paste("Cluster",i)
            }
            
            par(xaxt="n")
            
            lim <- max(abs(c(minimum, maximum)))+0.5
            
            plot(c(0,0), xlim=c(1, ncol(x)-1), ylim=c(-lim,lim), type="n", xlab="Variables", ylab="z-scores")
            
            par(xaxt="s")  #横軸に座標を再度書く指定
            axis(1, c(1:(ncol(x)-1)), colnames(x)[1:(ncol(x)-1)])
            
            legend("topright", legend = lbls, lty = c(1:nclust), pch = c(1:nclust), col=c(1:nclust), cex=.8, angle = 45)
            
            abline(h=0, lwd=0.2, lty=1)
            
            means <- t(means)
            
            for (i in 1:nclust) {
                points(means[,i], pch=i, col=i)
                lines(means[,i], pch=i, col=i, lty=i)
            }
        }
    }
    
    output$profilePlot <- renderPlot({
        print(makeProfilePlot()) # 上の function を参照する指定
    })
    
    
    
    
    
    info <- reactive({
        info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")# バージョン情報
        info2 <- paste("It was executed on ", date(), ".", sep = "")# 実行日時
        cat(sprintf(info1), "\n")
        cat(sprintf(info2), "\n")
    })
    
    output$info.out <- renderPrint({
        info()
    })





    output$textarea.out <- renderPrint({
        bs()
    })

    output$correl.out <- renderPrint({
        correl()
    })
    
    output$clusteranalysis.out <- renderPrint({
        clusteranalysis()
    })
    
    output$specifiedCluster.out <- renderPrint({
        specifiedCluster()
    })
    
})
library(shiny)
library(shinyAce)


shinyUI(bootstrapPage(


    headerPanel("Cluster Analysis"),


########## Adding loading message #########

tags$head(tags$style(type="text/css", "
#loadmessage {
position: fixed;
top: 0px;
left: 0px;
width: 100%;
padding: 10px 0px 10px 0px;
text-align: center;
font-weight: bold;
font-size: 100%;
color: #000000;
background-color: #CCFF66;
z-index: 105;
}
")),

conditionalPanel(condition="$('html').hasClass('shiny-busy')",
tags$div("Loading...",id="loadmessage")),

#-----------------------------------------#

    sidebarPanel(

    strong("Scores:"),

    checkboxInput("stdz", label = strong("Standardized"), value = T),

    br(),

    radioButtons("type", strong("Clustering:"),
            list("Cases" = "case",
                 "Variables" = "variable"), selected = "case"),

    br(),

    radioButtons("linkage", strong("Linkage method:"),
        list("ward" = "ward", "single" = "single", "complete" = "complete", "average" = "average",
             "mcquitty" = "mcquitty", "median" = "median", "centroid" = "centroid"), selected = "ward"),

    br(),

    radioButtons("distancce", strong("Distance measure:"),
        list("squared euclidean" = "squared.euclidean", "euclidean"="euclidean", "maximum"="maximum",
             "manhattan"="manhattan", "canberra"="canberra", "binary"="binary", "pearson"="pearson",
             "abspearson"="abspearson", "correlation"="correlation", "abscorrelation"="abscorrelation",
             "spearman"="spearman", "kendall"="kendall"),selected = "squared.euclidean")
    ),

    mainPanel(
        tabsetPanel(

        tabPanel("Main",

            h3("Data"),
            p('Input values must be separated by tabs. Copy and paste from Excel/Numbers.'),
            p(HTML("<b><div style='background-color:#FADDF2;border:1px solid black;'>Please make sure that your data includes the header (variable names) in the first row.</div></b>")),

            strong('Option:'),
            checkboxInput("rowname", label = strong("The first column contains case names."), value = T),

            aceEditor("text", value="ID\tTOEIC\tOral.Rehearsal\tAssociate\tEx.Motiv\tIntr.Motiv\tTime\n1\t390\t2.7\t3.7\t3.33\t3.17\t6.92\n2\t360\t4.3\t3\t3.33\t3.17\t5.83\n3\t410\t2\t2\t2.67\t2.5\t2.36\n4\t390\t3.3\t2.7\t4.67\t4.83\t4.36\n5\t365\t3.7\t1\t3.67\t4.5\t3.78\n6\t415\t4.3\t2.7\t4.33\t3.5\t6.55\n7\t415\t3\t2.7\t4\t2.67\t4.56\n8\t340\t2.7\t1.7\t4.33\t2.83\t4\n9\t370\t3.3\t3\t4.33\t3.67\t4.73\n10\t360\t1.3\t1.7\t4.33\t2.5\t4.5\n11\t410\t3.3\t2.7\t4\t4.17\t8.3\n12\t430\t4\t2\t4\t4\t8.58\n13\t340\t3.3\t3.3\t3.67\t3.67\t4.9\n14\t305\t3.7\t2.3\t3.33\t3.83\t7\n15\t380\t1.7\t2.3\t4\t4\t6.92\n16\t390\t3.7\t1\t3.67\t3.33\t5.18\n17\t300\t2.7\t1.7\t3\t2.5\t3.73\n18\t370\t3\t3.7\t3.33\t1.83\t6.2\n19\t315\t2\t2\t2.67\t2.67\t3.22\n20\t370\t3.3\t2.3\t4\t4.17\t4\n21\t315\t2.7\t3.3\t3\t3.33\t2.75\n22\t385\t3\t2.7\t4\t2.83\t7.25\n23\t405\t4\t3.3\t4.33\t3.17\t2.56\n24\t380\t5\t1.7\t5\t4.17\t5.6\n25\t355\t1.3\t2.3\t4.33\t4.83\t6.36\n26\t310\t1\t2.3\t4.67\t2.83\t3.18\n27\t345\t5\t4.3\t3.67\t2.5\t5\n28\t390\t3.3\t4\t5\t5\t4.83\n29\t340\t1\t1.7\t4.67\t3.33\t5.75\n30\t365\t1.3\t2\t3.33\t3.17\t6.42",
                mode="r", theme="cobalt"),

            br(),

            h3("Basic statistics"),
            verbatimTextOutput("textarea.out"),

            br(),

            h3("Correlation"),

            radioButtons("method", "Choose the type of correlation coefficients:",
                        list("Pearson product-moment correlation coefficient" = "Pearson",
                             "Spearman's rank correlation coefficient (Spearman's rho)" = "Spearman",
                             "Kendall tau rank correlation coefficient (Kendall's tau)" = "Kendall")),

            verbatimTextOutput("correl.out"),

            br(),

            strong("Scatter plot matrices"),
            br(),

            plotOutput("corPlot"),

            br(),


            h3("Cluster analysis"),

            verbatimTextOutput("clusteranalysis.out"),

            plotOutput("clusterPlot"),

            h3("Specifying the number of clusters"),

            numericInput("numspec", "Number of clusters:", 3),

            br(),
            br(),

            plotOutput("specPlot"),

            strong("Basic statistics of each cluster (Applicable only for case clustering)"),
            verbatimTextOutput("specifiedCluster.out"),

            br(),
            strong("Profile plot (Applicable only for case clustering)"),
            br(),

            plotOutput("profilePlot", height = "600px", width="80%"),

            br(),
            br(),

            strong('R session info'),
            verbatimTextOutput("info.out")

            ),


        tabPanel("About",

            strong('Note'),
            p('This web application is developed with',
            a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
            ''),

            br(),

            strong('List of Packages Used'), br(),
            code('library(shiny)'),br(),
            code('library(shinyAce)'),br(),
            code('library(psych)'),br(),
            code('library(amap)'),br(),

            br(),

            strong('Code'),
            p('Source code for this application is based on',
            a('"The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012).', href='http://mizumot.com/handbook/', target="_blank")),

            p('The code for this web application is available at',
            a('GitHub.', href='https://github.com/mizumot/cluster', target="_blank")),

            p('If you want to run this code on your computer (in a local R session), run the code below:',
            br(),
            code('library(shiny)'),br(),
            code('runGitHub("cluster","mizumot")')
            ),

            br(),

            strong('Citation in Publications'),
            p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp'),

            br(),

            strong('Article'),
            p('Mizumoto, A., & Plonsky, L. (2015).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract', target="_blank"), em('Applied Linguistics,'), 'Advance online publication. doi:10.1093/applin/amv025'),

            br(),

            strong('Recommended'),
            p('To learn more about R, I suggest this excellent and free e-book (pdf),',
            a("A Guide to Doing Statistics in Second Language Research Using R,", href="http://cw.routledge.com/textbooks/9780805861853/guide-to-R.asp", target="_blank"),
            'written by Dr. Jenifer Larson-Hall.'),

            p('Also, if you are a cool Mac user and want to use R with GUI,',
            a("MacR", href="https://sites.google.com/site/casualmacr/", target="_blank"),
            'is defenitely the way to go!'),

            br(),

            strong('Author'),
            p(a("Atsushi MIZUMOTO,", href="http://mizumot.com", target="_blank"),' Ph.D.',br(),
            'Professor of Applied Linguistics',br(),
            'Faculty of Foreign Language Studies /',br(),
            'Graduate School of Foreign Language Education and Research,',br(),
            'Kansai University, Osaka, Japan'),

            br(),

            a(img(src="http://i.creativecommons.org/p/mark/1.0/80x15.png"), target="_blank", href="http://creativecommons.org/publicdomain/mark/1.0/"),

            p(br())

            )

))
))
Code license: GPL-3