Title: | Log File Analysis in International Large-Scale Assessments |
---|---|
Description: | Enables users to handle the dataset cleaning for conducting specific analyses with the log files from two international educational assessments: the Programme for International Student Assessment (PISA, <http://www.oecd.org/pisa/>) and the Programme for the International Assessment of Adult Competencies (PIAAC, <http://www.oecd.org/skills/piaac/>). An illustration of the analyses can be found on the LOGAN Shiny app (<https://loganpackage.shinyapps.io/shiny/>) on your browser. |
Authors: | Denise Reis Costa [aut, cre], Waldir Leoncio [aut] |
Maintainer: | Denise Reis Costa <[email protected]> |
License: | GPL-3 |
Version: | 1.0.0 |
Built: | 2024-10-27 03:18:42 UTC |
Source: | https://github.com/derecost/logan |
This function allows you to clean events in the 'event.type' variable
CleanActions(data, event.type, clear.events)
CleanActions(data, event.type, clear.events)
data |
A |
event.type |
a vector with concatenate events. See |
clear.events |
a vector where all the events to be cleaned are listed.
Each element of this vector needs to be of a |
This function returns a data.frame
with the "new.event.type"
variable that cleaned events from the "event.type" variable.
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) df <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) # Function demonstration df.clean <- m0$CleanActions(df, event_type, c("ACER_EVENT_" = "")) table(df$event.type) table(df.clean$new.event.type) # cleaned version
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) df <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) # Function demonstration df.clean <- m0$CleanActions(df, event_type, c("ACER_EVENT_" = "")) table(df$event.type) table(df.clean$new.event.type) # cleaned version
This function allows you to concatenate event actions from diferent variables in a unique vector.
ConcatActions(data, concat.events)
ConcatActions(data, concat.events)
data |
A |
concat.events |
a vector where all the events are listed. Each element
of this vector needs to be of a |
The output dataset will be identical to the input dataset, except for the addition of one column in the end, called "event.type". Each row of event.type contains the values of concat.events of all the rows.
This function returns a data.frame
with the concatenated
events in the 'event.type' variable.
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) # Function demonstration df.conc <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) names(df) names(df.conc) # notice the extra variable in the end table(df.conc$event.type)
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) # Function demonstration df.conc <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) names(df) names(df.conc) # notice the extra variable in the end table(df.conc$event.type)
Log file for PISA 2012, CP025, Q01 (selected countries)
Treated log file and microdata for PISA 2012, CP025, Q01 (selected countries)
This is a function that translates a long to wide format dataset.
DataActionsbyID(data, id.var, event.var, name.var.action)
DataActionsbyID(data, id.var, event.var, name.var.action)
data |
A |
id.var |
a vector with the individuals identification. It is a
|
event.var |
a vector with the cleaned concatenate events. See
|
name.var.action |
A character string that will name the new variable of events |
This function returns a data.frame
with the only one entry by
individual identification and a new 'action.var' variable.
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) df <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) df <- m0$CleanActions(df, event.type, c("ACER_EVENT_" = "")) # Function demonstration m0$DataActionsbyID(df, id, new.event.type, "actions")
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) df <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) df <- m0$CleanActions(df, event.type, c("ACER_EVENT_" = "")) # Function demonstration m0$DataActionsbyID(df, id, new.event.type, "actions")
This is a function that reports a descriptive analysis of the strategy and students performance
DescriptiveStrategy(data, strategy.var, performance.item, performance.test, PartialCredit = FALSE)
DescriptiveStrategy(data, strategy.var, performance.item, performance.test, PartialCredit = FALSE)
data |
A |
strategy.var |
A character string with the name of the strategy variable |
performance.item |
A character string with the name of the item performance variable |
performance.test |
A character string with the name of the test performance variable |
PartialCredit |
Logical. It can be used when the item is partial credit score. |
This function returns a report with a descriptive analysis of the strategy and students performance
m2$DescriptiveStrategy(cp025q01.treated, "votat", "CP025Q01", "PV1CPRO")
m2$DescriptiveStrategy(cp025q01.treated, "votat", "CP025Q01", "PV1CPRO")
This is a simple function that, by default, reads an SPSS data file and save it as a data frame. It is essentially a wrapper for foreign::read.spss with arguments common to log file datasets.
ImportSPSS(filename)
ImportSPSS(filename)
filename |
character string: the name of the file or URL to read. |
This function returns a data frame.
This package enables users to handle the dataset cleaning for conducting specific analyses with the log files from two international educational assessments: the Programme for International Student Assessment (PISA, <http://www.oecd.org/pisa/>) and the Programme for the International Assessment of Adult Competencies (PIAAC, <http://www.oecd.org/skills/piaac/>). An illustration of the analyses can be found on the LOGAN Shiny app (<https://loganpackage.shinyapps.io/shiny/>) on your browser.
The LOGAN functions The LOGAN functions are organized in modules, so to call a function you must prefix it with, e.g., 'm0$', where "m0" is the module to which a certain function pertains.
What follows is a list of Functions organized per module:
Module 0:
CleanActions
ConcatActions
DataActionsbyID
ImportSPSS
RangeNumberActionsbyVar
TrimVar
Module 1:
NumericTimeVar
PlotTimeonTaskbyVar
SummaryTOTbyVar
TOTVar
VarTimebyID
Module 2:
DescriptiveStrategy
PlotStrategybyCatPerformance
VarActionSearch
Denise Reis Costa [aut, cre],
Waldir Leoncio Netto [aut]
Module 0: Data preparation
m0
m0
An object of class module
(inherits from list
) of length 6.
This module contains the following functions, which should be called by issuing "m0$<function_name>()": CleanActions, ConcatActions, DataActionsbyID, ImportSPSS, RangeNumberActionsbyVar, TrimVar
Module 1: Time
m1
m1
An object of class module
(inherits from list
) of length 5.
This module contains the following functions, which should be called by issuing "m1$<function_name>()": NumericTimeVar, PlotTimeonTaskbyVar, SummaryTOTbyVar, TOTVar, VarTimebyID
Module 2: Actions (cognitive related)
m2
m2
An object of class module
(inherits from list
) of length 3.
This module contains the following functions, which should be called by issuing "m2$<function_name>()": DescriptiveStrategy, PlotStrategybyCatPerformance, VarActionSearch.
This is a function that transforms a factor var time in numeric.
NumericTimeVar(data, vector.time)
NumericTimeVar(data, vector.time)
data |
A |
vector.time |
variable containing the time |
This function returns a data.frame
with the number of students
and number de actions (min-max) aggregated by a specific variable.
vector.time <- c("CP025Q01.END", "CP025Q01.START") m1$NumericTimeVar(cp025q01.treated, vector.time)
vector.time <- c("CP025Q01.END", "CP025Q01.START") m1$NumericTimeVar(cp025q01.treated, vector.time)
Microdata for PISA 2012 (selected countries)
This is a function that reports the number of students and number of actions (min-max) aggregated by a specific variable.
PlotStrategybyCatPerformance(data, strategy.var, categ.var, namexlab, nameylab)
PlotStrategybyCatPerformance(data, strategy.var, categ.var, namexlab, nameylab)
data |
A |
strategy.var |
strategy variable |
categ.var |
categorizing variable |
namexlab |
name of the variable in the x-axis |
nameylab |
name of the variable in the y-axis |
This function returns a data.frame
with the number of students
and number de actions (min-max) aggregated by a specific variable.
# Data preparation df <- cp025q01.treated df$categ <- cut(df$PV1CPRO, c(0, 423, 488, 553, 900)) df.dataplot <- df[, c("top", "categ")] df.dataplot[,1] <- as.factor(df.dataplot[,1]) df.dataplot[,2] <- as.factor(df.dataplot[,2]) # Function demonstration m2$PlotStrategybyCatPerformance(df.dataplot, top, categ, "Proficiency levels", "Percentage")
# Data preparation df <- cp025q01.treated df$categ <- cut(df$PV1CPRO, c(0, 423, 488, 553, 900)) df.dataplot <- df[, c("top", "categ")] df.dataplot[,1] <- as.factor(df.dataplot[,1]) df.dataplot[,2] <- as.factor(df.dataplot[,2]) # Function demonstration m2$PlotStrategybyCatPerformance(df.dataplot, top, categ, "Proficiency levels", "Percentage")
This is a function that reports the number of students and number de actions (min-max) aggregated by a specific variable.
PlotTimeonTaskbyVar(data, tot.var, performance.item, namexlab, nameylab = "Density")
PlotTimeonTaskbyVar(data, tot.var, performance.item, namexlab, nameylab = "Density")
data |
A |
tot.var |
a vector with the total time. It is a |
performance.item |
name of the item variable |
namexlab |
name of the plot's x-axis |
nameylab |
name of the plot's y-axis. Defaults to "Density" |
This function returns a data.frame
with the number of students
and number de actions (min-max) aggregated by a specific variable.
m1$PlotTimeonTaskbyVar(cp025q01.treated, "CP025Q01.TOT", "CP025Q01", namexlab = "Time on task (minutes)")
m1$PlotTimeonTaskbyVar(cp025q01.treated, "CP025Q01.TOT", "CP025Q01", namexlab = "Time on task (minutes)")
This is a function that reports the number of students and number de actions (min-max) aggregated by a specific variable.
RangeNumberActionsbyVar(data, id.var, var.group, save.table = TRUE)
RangeNumberActionsbyVar(data, id.var, var.group, save.table = TRUE)
data |
A |
id.var |
a vector with the individuals identification. It is a
|
var.group |
a vector with the group variable. It is a |
save.table |
if |
This function returns a data.frame
with the number of students
and number de actions (min-max) aggregated by a specific variable.
m0$RangeNumberActionsbyVar(cp025q01.treated, NewID, CNT, save.table = FALSE)
m0$RangeNumberActionsbyVar(cp025q01.treated, NewID, CNT, save.table = FALSE)
This is a function that reports the number of students and a summary of time on task aggregated by a specific variable.
SummaryTOTbyVar(data, tot.var, performance.item, na.rm = FALSE)
SummaryTOTbyVar(data, tot.var, performance.item, na.rm = FALSE)
data |
A |
tot.var |
a vector with the time on task. |
performance.item |
a vector with the group variable. It is a
|
na.rm |
remove missing data in 'performance.item'? Default is 'FALSE' |
This function returns a data.frame
with the number of students
and number de actions (min-max) aggregated by a specific variable.
m1$SummaryTOTbyVar(cp025q01.treated, "CP025Q01.TOT", "CP025Q01", TRUE)
m1$SummaryTOTbyVar(cp025q01.treated, "CP025Q01.TOT", "CP025Q01", TRUE)
This is a function that reports the number of students and a summary of time on task aggregated by a specific variable.
TOTVar(data, starttime.vec, endtime.vec, divBy = NA, tot.var)
TOTVar(data, starttime.vec, endtime.vec, divBy = NA, tot.var)
data |
A |
starttime.vec |
a vector with the individuals' identifications. It is a
|
endtime.vec |
a vector with the group variable. It is a |
divBy |
a vector with the group variable. It is a |
tot.var |
string containing the name of the output variable |
This function returns a data.frame
with the number of students
and number de actions (min-max) aggregated by a specific variable.
m1$TOTVar(cp025q01.treated, "CP025Q01.START", "CP025Q01.END", divBy = 60, tot.var = "CP025Q01.TOT")
m1$TOTVar(cp025q01.treated, "CP025Q01.START", "CP025Q01.END", divBy = 60, tot.var = "CP025Q01.TOT")
TrimVar()
is a function that allows you to remove whitespace inside the
strings of a vector.
TrimVar(data, trim.vector)
TrimVar(data, trim.vector)
data |
dataset |
trim.vector |
vector of variables on the dataset to be trimmed |
This function returns a vector removing trailing and leading spaces inside the original vector.
head(m0$TrimVar(cp025q01, "event"))
head(m0$TrimVar(cp025q01, "event"))
This is a function that locates specific events (using the
actions.search
argument) and create new variables associate with this
strategy.
VarActionSearch(data, action.var, actions.search)
VarActionSearch(data, action.var, actions.search)
data |
A |
action.var |
a vector with actions. See |
actions.search |
A character vector with the actions to be searched. |
This function returns a data.frame
with the frequency of each
specific events from the actions.search
argument and
"Freq.Actions.Search" summary.
# Counting the instances of top_setting == 1 df <- m2$VarActionSearch(cp025q01.treated, "CP025Q01.ACTIONS", "1_apply") table(df$freq.1_apply) # checking results
# Counting the instances of top_setting == 1 df <- m2$VarActionSearch(cp025q01.treated, "CP025Q01.ACTIONS", "1_apply") table(df$freq.1_apply) # checking results
Extracting the start or end time
VarTimebyID(data, id.var, time.var, event.var, name.var.time, new.name)
VarTimebyID(data, id.var, time.var, event.var, name.var.time, new.name)
data |
data frame |
id.var |
vector of unique identification |
time.var |
vector with the time variable |
event.var |
vector with the events |
name.var.time |
name of the time string to filter (ex.: "START_ITEM" or "END_ITEM") |
new.name |
name of the output variable |
a data frame with 'time' replaced with 'new.name'. The variable 'event.var' is dropped.
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) df <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) df <- m0$CleanActions(df, event.type, c("ACER_EVENT_" = "")) # Function demonstration m1$VarTimebyID(df, id, time, new.event.type, "START_ITEM", "start")
# Data preparation df <- cp025q01 df$id <- paste(df[, 1], df[, 2], df[, 3], sep = "-") df <- m0$TrimVar(df, c("event", "event_type", "diag_state")) df <- m0$ConcatActions(df, c(rlang::quo(event), rlang::quo(event_type))) df <- m0$CleanActions(df, event.type, c("ACER_EVENT_" = "")) # Function demonstration m1$VarTimebyID(df, id, time, new.event.type, "START_ITEM", "start")