Text AnalysisWord length (letters)Word length (syllables)SummaryDetailsSummaryDetails
Note
This web application is developed with Shiny. List of Packages Used library(shiny)
library(koRpus)
Code Source code for this application is mostly from koRpus: An R packge for text analysis. The code for this web application is available at GitHub.
If you want to run this code on your computer (in a local R session), run the code below:
Citation in Publications Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp Article Mizumoto, A., & Plonsky, L. (2015). R as a lingua franca: Advantages of using R for quantitative research in applied linguistics. Applied Linguistics, Advance online publication. doi:10.1093/applin/amv025 Recommended To learn more about R, I suggest this excellent and free e-book (pdf), A Guide to Doing Statistics in Second Language Research Using R, written by Dr. Jenifer Larson-Hall. Also, if you are a cool Mac user and want to use R with GUI, MacR is defenitely the way to go! Author
Atsushi MIZUMOTO,
Ph.D.
|
library(shiny)
library(koRpus)
shinyServer(function(input, output){
tagged.text <- reactive(tokenize(input$text, format="obj", lang="en"))
hyphenated.text <- reactive({
# set the next line to activate caching, if this application is run on a shiny server
#set.kRp.env(hyph.cache.file=file.path("/var","shiny-server","cache","koRpus",paste("hyph.cache.",input$lang,".rdata", sep="")))
hyphen(tagged.text(), quiet=TRUE)
})
output$word.list <- renderTable({
x <- input$text
x <- tolower(x)
words <- unlist (strsplit (x, split = "[[:space:]]+|[[:punct:]]+"))
Word <- words[words !=""]
Word.freq <- as.data.frame(table (Word))
Word.sorted <- Word.freq[order(Word.freq$Freq, decreasing = TRUE), ]
return(Word.sorted)
})
#output$word.list <- renderPrint({
#word.list()
#})
output$letter.plot <- renderPlot(plot(tagged.text(), what="letters"))
output$desc <- renderTable({
basic.desc.data <- as.data.frame(describe(tagged.text())[c("all.chars","normalized.space","chars.no.space", "letters.only","lines",
"punct","digits","words","sentences","avg.sentc.length","avg.word.length")])
syll.desc.data <- as.data.frame(describe(hyphenated.text())[c("num.syll", "avg.syll.word")])
colnames(basic.desc.data) <- c("All characters","Normalized space","Characters (no space)", "Characters (letters only)","Lines",
"Punctuation","Digits","Words","Sentences","Avg. sentence length","Avg. word length")
colnames(syll.desc.data) <- c("Syllables", "Avg. syllable per word")
desc.data <- cbind(basic.desc.data, syll.desc.data)
rownames(desc.data) <- c("Value")
t(desc.data)
})
output$desc.lttr.disrib <- renderTable({
t(describe(tagged.text())[["lttr.distrib"]])
})
output$syll.disrib <- renderTable({
t(describe(hyphenated.text())[["syll.distrib"]])
})
LD.results <- reactive(lex.div(tagged.text(), segment=input$LD.segment, factor.size=input$LD.factor, min.tokens=input$LD.minTokens,
rand.sample=input$LD.random, window=input$LD.window, case.sens=input$LD.caseSens, detailed=FALSE, char=c(), quiet=TRUE))
output$lexdiv.sum <- renderTable({
summary(LD.results())
})
output$lexdiv.res <- renderPrint({
LD.results()
})
RD.results <- reactive(readability(tagged.text(), hyphen=hyphenated.text(), index=input$RD.indices, quiet=TRUE))
output$readability.sum <- renderTable({
summary(RD.results())
})
output$readability.res <- renderPrint({
RD.results()
})
})
library(shiny)
shinyUI(
pageWithSidebar(
headerPanel("Text Analysis"),
sidebarPanel(
# limit the maximum amount of text to be analyzed
includeHTML("./maxlength.html"),
h4("Text to analyze:"),
tags$textarea(id="text", rows=30, cols=35, maxlength=10000,
onblur="if(this.value==\"\") this.value=\"(Paste your text here. Text limit is 10000 characters, but should at least have 100 words.)\";",
onfocus="if(this.value==\"(Paste your text here. Text limit is 10000 characters, but should at least have 100 words.)\") this.value=\"\";",
"(Paste your text here. Text limit is 10000 characters, but should at least have 100 words.)"),
conditionalPanel("input.tab == 'chkLexdiv'",
h4("Lexical diversity options:"),
numericInput("LD.segment", "MSTTR segment size:", 100),
sliderInput("LD.factor", "MTLD/MTLD-MA factor size:", min=0, max=1, value=0.72),
numericInput("LD.minTokens", "MTLD-MA min. tokens/factor:", 9),
numericInput("LD.random", "HD-D sample size:", 42),
numericInput("LD.window", "MATTR moving window:", 100),
checkboxInput("LD.caseSens", "Case sensitive", FALSE)
),
conditionalPanel("input.tab == 'chkReadability'",
h4("Readability options:"),
checkboxGroupInput("RD.indices", label="Measures to calculate",
choices=c("ARI"="ARI",
"ARI (NRI)"="ARI.NRI",
"ARI (simplified)"="ARI.simple",
"Coleman-Liau"="Coleman.Liau",
"Danielson-Bryan"="Danielson.Bryan",
"Dickes-Steiwer"="Dickes.Steiwer",
"ELF"="ELF",
"Farr-Jenkins-Paterson"="Farr.Jenkins.Paterson",
"Farr-Jenkins-Paterson (Powers-Sumner-Kearl)"="Farr.Jenkins.Paterson.PSK",
"Flesch"="Flesch",
"Flesch (Powers-Sumner-Kearl)"="Flesch.PSK",
"Flesch (DE, Amstad)"="Flesch.de",
"Flesch (ES, Fernandez-Huerta)"="Flesch.es",
"Flesch (FR, Kandel-Moles)"="Flesch.fr",
"Flesch (NL, Douma)"="Flesch.nl",
"Flesch-Kincaid"="Flesch.Kincaid",
"FOG"="FOG",
"FOG (Powers-Sumner-Kearl)"="FOG.PSK",
"FOG (NRI)"="FOG.NRI",
"FORCAST"="FORCAST",
"FORCAST (reading grade level)"="FORCAST.RGL",
"Fucks Stilcharakteristik"="Fucks",
"Linsear-Write"="Linsear.Write",
"LIX"="LIX",
"Neue Wiener Sachtextformeln"="nWS",
"RIX"="RIX",
"SMOG"="SMOG",
"SMOG (DE, Bamberger-Vanecek)"="SMOG.de",
"SMOG (formula C)"="SMOG.C",
"SMOG (simplified)"="SMOG.simple",
"Strain"="Strain",
"TRI"="TRI",
"Wheeler-Smith"="Wheeler.Smith",
"Wheeler-Smith (DE, Bamberger-Vanecek)"="Wheeler.Smith.de"),
selected=c("ARI",
"Coleman.Liau",
"Danielson.Bryan",
"Dickes.Steiwer",
"ELF",
"Farr.Jenkins.Paterson",
"Flesch",
"Flesch.Kincaid",
"FOG",
"FORCAST",
"Fucks",
"Linsear.Write",
"LIX",
"RIX",
"SMOG",
"Strain",
"Wheeler.Smith"))
)
# submitButton("Update View")
),
mainPanel(
tabsetPanel(
tabPanel("Descriptive statistics",
tableOutput("desc"),
h5("Word length (letters)"),
tableOutput("desc.lttr.disrib"),
h5("Word length (syllables)"),
tableOutput("syll.disrib"),
plotOutput("letter.plot")
),
tabPanel("Word list",
tableOutput("word.list")
),
tabPanel("Lexical diversity",
h5("Summary"),
tableOutput("lexdiv.sum"),
h5("Details"),
pre(textOutput("lexdiv.res")),
value="chkLexdiv"
),
tabPanel("Readability",
h5("Summary"),
tableOutput("readability.sum"),
h5("Details"),
pre(textOutput("readability.res")),
value="chkReadability"
),
tabPanel("About",
strong('Note'),
p('This web application is developed with',
a("Shiny.", href="http://www.rstudio.com/shiny/", target="_blank"),
''),
br(),
strong('List of Packages Used'), br(),
code('library(shiny)'),br(),
code('library(koRpus)'),br(),
br(),
strong('Code'),
p('Source code for this application is mostly from',
a('koRpus: An R packge for text analysis.', href='http://reaktanz.de/?c=hacking&s=koRpus', target="_blank")),
p('The code for this web application is available at',
a('GitHub.', href='https://github.com/mizumot/corpus', target="_blank")),
p('If you want to run this code on your computer (in a local R session), run the code below:',
br(),
code('library(shiny)'),br(),
code('runGitHub("corpus","mizumot")')
),
br(),
strong('Citation in Publications'),
p('Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp'),
br(),
strong('Article'),
p('Mizumoto, A., & Plonsky, L. (2015).', a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href='http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract', target="_blank"), em('Applied Linguistics,'), 'Advance online publication. doi:10.1093/applin/amv025'),
br(),
strong('Recommended'),
p('To learn more about R, I suggest this excellent and free e-book (pdf),',
a("A Guide to Doing Statistics in Second Language Research Using R,", href="http://cw.routledge.com/textbooks/9780805861853/guide-to-R.asp", target="_blank"),
'written by Dr. Jenifer Larson-Hall.'),
p('Also, if you are a cool Mac user and want to use R with GUI,',
a("MacR", href="https://sites.google.com/site/casualmacr/", target="_blank"),
'is defenitely the way to go!'),
br(),
strong('Author'),
p(a("Atsushi MIZUMOTO,", href="http://mizumot.com", target="_blank"),' Ph.D.',br(),
'Professor of Applied Linguistics',br(),
'Faculty of Foreign Language Studies /',br(),
'Graduate School of Foreign Language Education and Research,',br(),
'Kansai University, Osaka, Japan'),
br(),
a(img(src="http://i.creativecommons.org/p/mark/1.0/80x15.png"), target="_blank", href="http://creativecommons.org/publicdomain/mark/1.0/"),
p(br())
)
))
))