Load packages
# Packages
library(tidyverse)
library(codebook)
library(papaja)
library(kableExtra)
October 9, 2024
This page contains the codebook (i.e., description of each variable) of the data set used in the manuscript #Knowledge: Improving food-related knowledge via seeding implemented as a social media intervention. The data file (named data_insta_seeding.csv
) can be downloaded from the OSF or with the command osfr::osf_download(osf_retrieve_file('gva9p))
.
# The make_df_readme function generates a summary of a given data frame in R, creating a descriptive table that outlines the structure and content of the data frame. If the add_examples parameter is set to TRUE (the default), the function samples two random rows from df as examples.
make_df_readme <- function(df, desc, add_examples = TRUE, digits = 2, n_examples = 4) {
# Input validation
if (!is.data.frame(df)) stop("The input 'df' must be a data frame.")
if (!is.character(desc) || length(desc) != ncol(df))
stop("The 'desc' argument must be a character vector with the same length as the number of columns in 'df'.")
if (!is.numeric(digits) || digits < 0) stop("The 'digits' argument must be a non-negative integer.")
if (!is.numeric(n_examples) || n_examples <= 0) stop("The 'n_examples' argument must be a positive integer.")
# Create the initial summary dataframe
temp0 <- data.frame(
"Variable" = names(df),
"Type" = sapply(df, function(x) class(x)[1]), # Ensure it handles multiple class objects correctly
"Description" = desc,
stringsAsFactors = FALSE
)
row.names(temp0) <- NULL
# Optionally add examples
if (add_examples) {
sampled_rows <- df[sample(1:nrow(df), n_examples), ]
example_values <- apply(sampled_rows, 2, function(col) {
if (is.numeric(col)) {
col <- printnum(col, digits=digits)
}
paste(col, collapse = ", ")
})
temp0$Example <- example_values
}
return(temp0)
}
The data file data_insta_seeding.csv
contains 44 variables and 10506 rows.
desc <- c("Unique person identifier",
"Criterion shown during 15 day training/seeding phase",
"Estimated criterion in the final questionnaire",
"Phase of the experiment [2AFC, estimation]",
"Number of trial in the corresponding phase",
"In which position was the old item shown during thte 2AFC test",
"File name of the old item",
"File name of the new item",
"Image selected as old by the participant",
"Unique item identifier [1-60]",
"Name of the item",
"Category of the item",
"Was this item a seeding item in the CO2 account? [1 = yes, 0 = no]",
"Was this item a seeding item in the Kcal account? [1 = yes, 0 = no]",
"Actual CO2 criterion value of the item",
"Actual kcal criterion value of the item",
"Estimate/judgment of the participant",
"Individual keys pressed by the participants to type in their judgment",
"Overall reaction time for each judgment (from stimulus shown until confirmed button clicked",
"Internal experimental clock time when the stimulus was shown",
"Internal experimental clock time when the first input was made",
"Internal experimental clock time for all inputs",
"Day when the item was posted on the corresponding account (only applicable if it was a seeding item)",
"Total time (in min) taken by the participant for all trials in the corresponding phase (2AFC or estimation)",
"Age of the participant",
"Gender of the participant [female, male, non_binary]",
"Educational level of participants",
"Self-reported knowledge of CO2 footprint of food items [1-7]",
"Self-reported knowledge of calorie content of food items [1-7]",
"Item 1 of the Short Subjective Numeracy Scale",
"Item 2 of the Short Subjective Numeracy Scale",
"Item 3 of the Short Subjective Numeracy Scale",
"Controle question: ~ Should your date be used for analyis? [yes, no]",
"Controle question: ~ Did you use any form of aid? [yes, no]",
"Controle question: ~ Where there any technical problems [yes, no]",
"Controle question: ~ Describe the problem",
"Controle question: ~ Where there any problems on instagram? [yes, no]",
"Controle question: ~ Describe the problem",
"Controle question: ~ How many posts of the 30 possible posts did you see on instagram?",
"Controle question: ~ How did you make your estimates ? Did you use a specific strategy?",
"Controle question: ~ Do you have any ohter comments?",
"On which day after the first invitation to the final questionnaire did the participants actually participate?",
"Number of posts liked of the participant [0-30]",
"Number of unique days on which the participant liked the posts [0-15]")
tbl <- make_df_readme(df,desc)
if (knitr::is_latex_output()) {
tbl %>%
kable(format="latex", booktabs = TRUE) %>%
kableExtra::kable_styling(tbl, latex_options = 'scale_down')
} else {
tbl %>%
kable(format = "html",
table.attr = 'class="table table-striped table-hover"',
escape = FALSE) %>%
kable_styling(fixed_thead = T)
}
Variable | Type | Description | Example |
---|---|---|---|
ID | character | Unique person identifier | 18vk5qdusp, cq3j9z9i3n, unizrjefu9, l0ks4o8jmo |
trained_criterion | character | Criterion shown during 15 day training/seeding phase | CO2, CO2, CO2, CO2 |
est_criterion | character | Estimated criterion in the final questionnaire | Kcal, Kcal, CO2, Kcal |
phase | character | Phase of the experiment [2AFC, estimation] | estimation, estimation, estimation, estimation |
trial | numeric | Number of trial in the corresponding phase | 9, 19, 8, 23 |
position_old | character | In which position was the old item shown during thte 2AFC test | NA, NA, NA, NA |
old | character | File name of the old item | NA, NA, NA, NA |
new | character | File name of the new item | NA, NA, NA, NA |
img_clicked | character | Image selected as old by the participant | NA, NA, NA, NA |
ID_item | numeric | Unique item identifier [1-60] | 9, 6, 4, 23 |
item | character | Name of the item | Banana, Lettuce (Iceberg), Carrots, Shrimp (frozen) |
category | character | Category of the item | fruit & vegetables, fruit & vegetables, fruit & vegetables, meat & fish |
seeding_CO2 | numeric | Was this item a seeding item in the CO2 account? [1 = yes, 0 = no] | 0, 0, 0, 0 |
seeding_Kcal | numeric | Was this item a seeding item in the Kcal account? [1 = yes, 0 = no] | 0, 0, 1, 0 |
crit_CO2 | numeric | Actual CO2 criterion value of the item | 990, 590, 310, 12100 |
crit_kcal | numeric | Actual kcal criterion value of the item | 93, 16, 39, 60 |
judgment | numeric | Estimate/judgment of the participant | 90, 5, 2, 150 |
input_values | character | Individual keys pressed by the participants to type in their judgment | 5,0,NA,NA,6,8,NA,NA,9,0, 5, 2, 1,5,0 |
rt_ms | numeric | Overall reaction time for each judgment (from stimulus shown until confirmed button clicked | 17739.700, 2675.222, 1672.000, 3864.000 |
time_stim_shown | numeric | Internal experimental clock time when the stimulus was shown | 303769.3, 190463.1, 1326320.0, 278878.0 |
first_input_time | logical | Internal experimental clock time when the first input was made | NA, NA, NA, NA |
input_times | numeric | Internal experimental clock time for all inputs | NA, 192186, 1327703, NA |
day_posted | numeric | Day when the item was posted on the corresponding account (only applicable if it was a seeding item) | NA, NA, NA, NA |
est_phase_time_min | numeric | Total time (in min) taken by the participant for all trials in the corresponding phase (2AFC or estimation) | 10.112658, 3.092435, 3.341000, 6.117700 |
age | numeric | Age of the participant | 39, 35, 19, 27 |
gender | character | Gender of the participant [female, male, non_binary] | female, male, female, female |
education_level | character | Educational level of participants | Abitur, Bachelor, Abitur, Bachelor |
CO2_knowledge | numeric | Self-reported knowledge of CO2 footprint of food items [1-7] | 1, 4, 2, 1 |
Kcal_knowledge | numeric | Self-reported knowledge of calorie content of food items [1-7] | 4, 5, 7, 1 |
SNS_1 | numeric | Item 1 of the Short Subjective Numeracy Scale | 3, 5, 5, 3 |
SNS_2 | numeric | Item 2 of the Short Subjective Numeracy Scale | 4, 5, 5, 5 |
SNS_3 | numeric | Item 3 of the Short Subjective Numeracy Scale | 1, 5, 4, 4 |
quality | character | Controle question: ~ Should your date be used for analyis? [yes, no] | yes, yes, yes, yes |
cheated | character | Controle question: ~ Did you use any form of aid? [yes, no] | no, no, no, no |
technical_problems | character | Controle question: ~ Where there any technical problems [yes, no] | no, no, no, no |
technical_problems_specific | character | Controle question: ~ Describe the problem | NA, NA, NA, NA |
technical_problems_insta | character | Controle question: ~ Where there any problems on instagram? [yes, no] | no, no, no, no |
technical_problems_insta_specific | character | Controle question: ~ Describe the problem | NA, NA, NA, NA |
est_number_insta_posts | numeric | Controle question: ~ How many posts of the 30 possible posts did you see on instagram? | 30, 28, 30, 29 |
strategy | character | Controle question: ~ How did you make your estimates ? Did you use a specific strategy? | Bauchgefühl, NA, NA, NA |
other_comments | character | Controle question: ~ Do you have any ohter comments? | NA, Aufbereitung der Posts und Informationen war nicht sehr einprägsam/ ansprechend, NA, NA |
participated_day | numeric | On which day after the first invitation to the final questionnaire did the participants actually participate? | 0, 0, 0, 1 |
n_posts_liked | numeric | Number of posts liked of the participant [0-30] | 30, 28, 30, 30 |
n_dates_liked | numeric | Number of unique days on which the participant liked the posts [0-15] | 14, 10, 15, 12 |
---
title: "Codebook"
date: "`r Sys.Date()`"
format:
html:
code-fold: true
code-copy: true
code-tools: true
embed-resources: true
df-print: kable
toc: true
toc-location: right
fig-format: svg
page-layout: full
fig-height: 4
fig-width: 9
fig-align: center
pdf:
toc: true
toc-depth: 2
fig-height: 4
fig-width: 9
fig-align: center
df-print: kable
execute:
echo: false
execute:
warning: false
messages: false
editor_options:
chunk_output_type: console
---
```{r setup}
#| code-summary: "Load packages"
# Packages
library(tidyverse)
library(codebook)
library(papaja)
library(kableExtra)
```
This page contains the **codebook** (i.e., description of each variable) of the data set used in the manuscript *#Knowledge: Improving food-related knowledge via seeding implemented as a social media intervention*. The data file (named `data_insta_seeding.csv`) can be downloaded from the [OSF](https://osf.io/vztjq/) or with the command `osfr::osf_download(osf_retrieve_file('gva9p))`.
```{r function}
#| code-summary: "Helper function"
# The make_df_readme function generates a summary of a given data frame in R, creating a descriptive table that outlines the structure and content of the data frame. If the add_examples parameter is set to TRUE (the default), the function samples two random rows from df as examples.
make_df_readme <- function(df, desc, add_examples = TRUE, digits = 2, n_examples = 4) {
# Input validation
if (!is.data.frame(df)) stop("The input 'df' must be a data frame.")
if (!is.character(desc) || length(desc) != ncol(df))
stop("The 'desc' argument must be a character vector with the same length as the number of columns in 'df'.")
if (!is.numeric(digits) || digits < 0) stop("The 'digits' argument must be a non-negative integer.")
if (!is.numeric(n_examples) || n_examples <= 0) stop("The 'n_examples' argument must be a positive integer.")
# Create the initial summary dataframe
temp0 <- data.frame(
"Variable" = names(df),
"Type" = sapply(df, function(x) class(x)[1]), # Ensure it handles multiple class objects correctly
"Description" = desc,
stringsAsFactors = FALSE
)
row.names(temp0) <- NULL
# Optionally add examples
if (add_examples) {
sampled_rows <- df[sample(1:nrow(df), n_examples), ]
example_values <- apply(sampled_rows, 2, function(col) {
if (is.numeric(col)) {
col <- printnum(col, digits=digits)
}
paste(col, collapse = ", ")
})
temp0$Example <- example_values
}
return(temp0)
}
```
```{r items}
#| code-summary: "Download & read in data"
# download data
# osf_download(osf_retrieve_file("gva9p"),path = "Data")
# read in data
df <- read_csv2("../Data/data_insta_seeding.csv") %>%
mutate(input_times = as.numeric(input_times))
```
The data file `data_insta_seeding.csv` contains `r ncol(df)` variables and `r nrow(df)` rows.
```{r}
desc <- c("Unique person identifier",
"Criterion shown during 15 day training/seeding phase",
"Estimated criterion in the final questionnaire",
"Phase of the experiment [2AFC, estimation]",
"Number of trial in the corresponding phase",
"In which position was the old item shown during thte 2AFC test",
"File name of the old item",
"File name of the new item",
"Image selected as old by the participant",
"Unique item identifier [1-60]",
"Name of the item",
"Category of the item",
"Was this item a seeding item in the CO2 account? [1 = yes, 0 = no]",
"Was this item a seeding item in the Kcal account? [1 = yes, 0 = no]",
"Actual CO2 criterion value of the item",
"Actual kcal criterion value of the item",
"Estimate/judgment of the participant",
"Individual keys pressed by the participants to type in their judgment",
"Overall reaction time for each judgment (from stimulus shown until confirmed button clicked",
"Internal experimental clock time when the stimulus was shown",
"Internal experimental clock time when the first input was made",
"Internal experimental clock time for all inputs",
"Day when the item was posted on the corresponding account (only applicable if it was a seeding item)",
"Total time (in min) taken by the participant for all trials in the corresponding phase (2AFC or estimation)",
"Age of the participant",
"Gender of the participant [female, male, non_binary]",
"Educational level of participants",
"Self-reported knowledge of CO2 footprint of food items [1-7]",
"Self-reported knowledge of calorie content of food items [1-7]",
"Item 1 of the Short Subjective Numeracy Scale",
"Item 2 of the Short Subjective Numeracy Scale",
"Item 3 of the Short Subjective Numeracy Scale",
"Controle question: ~ Should your date be used for analyis? [yes, no]",
"Controle question: ~ Did you use any form of aid? [yes, no]",
"Controle question: ~ Where there any technical problems [yes, no]",
"Controle question: ~ Describe the problem",
"Controle question: ~ Where there any problems on instagram? [yes, no]",
"Controle question: ~ Describe the problem",
"Controle question: ~ How many posts of the 30 possible posts did you see on instagram?",
"Controle question: ~ How did you make your estimates ? Did you use a specific strategy?",
"Controle question: ~ Do you have any ohter comments?",
"On which day after the first invitation to the final questionnaire did the participants actually participate?",
"Number of posts liked of the participant [0-30]",
"Number of unique days on which the participant liked the posts [0-15]")
tbl <- make_df_readme(df,desc)
if (knitr::is_latex_output()) {
tbl %>%
kable(format="latex", booktabs = TRUE) %>%
kableExtra::kable_styling(tbl, latex_options = 'scale_down')
} else {
tbl %>%
kable(format = "html",
table.attr = 'class="table table-striped table-hover"',
escape = FALSE) %>%
kable_styling(fixed_thead = T)
}
```