prelim_analysis

```{r importing packages, message=FALSE, warning=FALSE} library(knitr) library(tidyverse) library(gridExtra) library(ggridges)

opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, results = 'show', tidy.opts=list(width.cutoff=60),tidy=TRUE)

```{r importing data and defining functions}
surveydata = read.csv("data/surveydata.csv")
files = list.files(path ="data", pattern="^10.*.csv$", full.names = TRUE)
expdata_full = do.call(bind_rows, 
                       lapply(files, function(x) read.csv(x, stringsAsFactors = FALSE)))

expdata <- expdata_full %>% 
  select(participant, 
         words, 
         mask, 
         corr_ans, 
         category,
         nonwords, 
         tweets, 
         topic, 
         task, 
         keypress, 
         RT, 
         corr, 
         participant, 
         AMP_nonword_rating.response,    
         AMP_word_rating.response,    
         AMP_random_rating.response,    
         LDT_rating.response,
         AMP_loop.thisN,
         AMP_trials.thisN)

# This function will check to see whether an observation is 
# an outlier based on median absolute deviation (MAD)

out_mad <- function(x, thres = 3, na.rm = TRUE) {
  abs(x - median(x, na.rm = na.rm)) >= thres * mad(x, na.rm = na.rm)
}

out_replace = function(dataframe, cols, rows, newValue = NA) {
    if (any(rows)) {
        set(dataframe, rows, cols, newValue)
    }
}

Recoding and Data Checks

Let's do some recoding and variable manipulation.

For some reason PsychoPy records correct as 0 and incorrect as 1, so let's switch those.
We'll add a new variable caled "wordcat" that tells us whether the word is moral, nonmoral, or a nonword.
We'll split up the category variable into "foundation" and "valence" in case we are interested in the two things separately at any point.
Looks like I accidentally named the AMP different things in condition 1 and condition 2. To remedy this, we'll coalesce the 'AMP_loop.thisNandAMP_trials.thisN` variables together into one column.

expdata_w <- expdata %>% 
  mutate(corr = ifelse(corr == 1, 0,
                       ifelse(corr == 0, 1, NA)),
         wordcat = ifelse(category == "neutral", "neutral",
                          ifelse(category == "nonword", "nonword",
                                 ifelse(!is.na(category), "moralword", NA))))

expdata_w <- expdata_w %>% 
  separate(category, c("foundation", "valence"), "[.]") %>% 
  mutate(valence = ifelse(foundation == "control" & words == "pleasant", "positive",
                          ifelse(foundation == "control" & words == "unpleasant", "negative", 
                                 ifelse(foundation == "nonword", "nonword",
                                        ifelse(valence == "virtue", "positive",
                                               ifelse(valence == "vice", "negative", NA))))))

expdata_w <- expdata_w %>% 
  mutate(trialnum = coalesce(AMP_loop.thisN, AMP_trials.thisN))

expdata_w <- expdata_w %>%
  mutate_at(vars(participant), funs(as.factor))

First, let's make sure that we have all of the data and that we don't have duplicate subject numbers. For this we need to get the LDT trials and the AMP trials specifically, and then check to see how many trials we have per subject.

AMP_trials <- expdata_w %>% filter(task == "AMP")
LDT_trials <- expdata_w %>% filter(task == "LDT")

AMP_trials %>% group_by(participant) %>% summarize(n = n())
LDT_trials %>% group_by(participant) %>% summarize(n = n())

Looks like it's all there.

Now we need to do a little bit of cleaning to try our best to get rid of outlying STRTs as well as those who didn't actually try in the task(s). I'm going to look at a few things: a) who they self-reported random responses, b) chance accuracy in the LDT, c) distributions of response times, d) the participants that the RA's called out as not paying attention

First, lets look at self-reported ratings. These are going to be fairly interesting.

```{r rating responses}

df <- expdata_w %>% select(participant, AMP_nonword_rating.response, AMP_word_rating.response, AMP_random_rating.response, LDT_rating.response) %>% group_by(participant) %>% summarize(AMP_word = mean(AMP_word_rating.response, na.rm=TRUE), AMP_nonword = mean(AMP_nonword_rating.response, na.rm=TRUE), AMP_random = mean(AMP_random_rating.response, na.rm=TRUE), LDT_random = mean(LDT_rating.response, na.rm=TRUE))

plot_data <- df %>% gather("task", "response", -participant)

ggplot(plot_data, aes(x = response, y = ..count..)) + geom_bar(position = "dodge") + scale_x_continuous(breaks=1:7,labels=c("not at all", "","", "somewhat", "","", "completely")) + scale_fill_discrete(labels=c("AMP judgment based on targets", "AMP judgment based on primes", "AMP responded randomly", "LDT responded randomly")) + labs(title = "Post-task Responses") + facet_wrap(. ~ task)

df %>% select(participant, LDT_random, AMP_random) %>% filter(AMP_random == 7 | LDT_random == 7)

Looks like people were more willing to admit they responded randomly in the LDT (1001,1013,1023,1029,1032,1063) than the AMP (1032, 1038, 1080). Let's look at button presses in the AMP and the LDT.

```{r press proportions}

df1 <- expdata_w %>% filter(task == "LDT" | task == "AMP") %>% 
  group_by(participant, task, keypress) %>%  
  summarize(n = n())

df1 <- df1 %>% spread(keypress, n) %>% 
  summarize(ratio = left/right)

median(df1$ratio) + 3 * sd(df1$ratio)

There are a few clear outliers here. 1031 and 1074. 1074 was one of the ones called out by the RA's as not trying at all. Would probably be worth filtering if they pop up again.

```{r LDT accuracy}

LDT_trials <- expdata_w %>% group_by(participant) %>% mutate(LDT_random = mean(LDT_rating.response, na.rm=TRUE)) %>% ungroup() %>% filter(task == "LDT") %>% group_by(participant) %>% summarize(corr = mean(corr, na.rm=TRUE))

Let's see if there are any participants who are correctness outliers.

df_indouts <- LDT_trials %>% select(participant, corr) %>% mutate_if(is.numeric, funs(mad = out_mad))

table(df_indouts$mad)

Doesn't look like it. Let's look at this based on words. Should be interesting but I don't know if we have enough data for strong conclusions here yet. 

```{r}
LDT_trials <- expdata_w %>% 
  group_by(words) %>% 
  filter(task == "LDT") %>%
  summarize(corr = mean(corr, na.rm=TRUE))

df <- LDT_trials %>% group_by(words) %>% summarize(corr = mean(corr), n = n())

df_wordouts <- df %>% 
  select(words, corr, n) %>% 
  mutate_if(is.numeric, funs(mad = out_mad))

table(df_wordouts$corr_mad)

Only two words are outliers, the non-word "ammessment," and the non-word "tove" Only 28% of people got ammessment and only 27% got tove correct.

Moral words were more accurately responded to as words than were nonwords. Looks like there are a few people that hover around chance in their responses but they aren't technically outliers.

Now let's looks at reaction times in the AMP and the LDT. I'm not sure that we are going to be able to pick out people who are just clicking through based on reaction time, but I'll do some cross referencing with the people who the RA's said were just clicking through without looking at the screen and see if I can get an idea.

```{r reaction times}

df <- expdata_w %>% filter(task == "AMP" | task == "LDT")

ggplot(df, aes(x = RT)) + geom_histogram()

Pretty highly right-skewed. Let's remove outliers within subjects and conditions and then check to see how we look.

```{r}

df_indouts <- df %>% 
  group_by(participant, task) %>% 
  select(participant, 
         RT, 
         foundation, 
         valence, 
         wordcat, 
         task, 
         words, 
         corr,
         trialnum,
         nonwords,
         keypress) %>% 
  mutate_if(is.numeric, funs(mad = out_mad))

# Filtering based on that column
expdata_rtfilt <- filter(df_indouts, RT_mad == FALSE) 

# Additionally replacing RTs less than 100msec with 100msec, following Whelan (2008)

expdata_rtfilt <- expdata_rtfilt %>% 
  mutate(RT = ifelse(RT <= .1, .1, RT))

ggplot(expdata_rtfilt, aes(x = RT)) + geom_histogram()

Looks a little better. Let's see if we have any partipant outliers. Here I'll go back to the unfiltered dataset and run the same procedure on the participant means.

df_groupouts <- df %>% 
  group_by(participant, task) %>%
  summarize(meanrt = mean(RT, na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(task) %>%
  select(participant, 
         task, 
         meanrt) %>% 
  mutate_if(is.numeric, funs(mad = out_mad))

Looks like we have a few outliers, but I'm hestitant to toss them since they're on the long end, suggesting that they are thinking about their answers rather than just thumbing through mindlessly. We can do some thinking on this.

So the only cleaning that that we've done so far is re: reaction times. We can perhaps do some more thoughtful cleaning in the future. Now that we have filtered, we can go on to some some preliminary analysis. Let's do some EDA plotting

LDT EDA

# First, let's look at neutral words vs. nonwords vs. moral words

plt_data <- expdata_rtfilt %>% 
  filter(task == "LDT" & wordcat != "nonword") %>%
  group_by(participant, wordcat) %>% 
  mutate(meanrt = mean(RT)) %>% 
  group_by(participant, wordcat) %>% 
  summarize(wordrt = mean(RT),
            corr = mean(corr),
            n = n()) %>%
  ungroup()

plt_data %>% do(broom::tidy(t.test(corr~wordcat, data=.)))

# Box plot of correctness by category
plt1 <- ggplot(plt_data, aes(x = wordcat, y = corr, fill = wordcat)) + 
  geom_boxplot() + coord_flip() + labs(title = "Response Correctness by Category", x = "Word Category", y = "Mean Correctness") + theme_minimal() + theme(legend.title=element_blank())

# Density plot of correctness by category
plt2 <- ggplot(plt_data, aes(x = corr, fill = wordcat, color = wordcat)) + 
  geom_density(alpha = .8) + labs(title = "Response Correctness by Category", x = "Mean Correctness") + theme_minimal() + theme(legend.title=element_blank())

# Boxplot of RT by category
plt3 <- ggplot(plt_data, aes(x = wordcat, y = meanrt, fill = wordcat)) + 
  geom_boxplot() + coord_flip() + labs(title = "Reaction Times by Category", x = "Word Category", y = "Mean Reaction Time") + theme_minimal() + theme(legend.title=element_blank())

#Density plot of RT by category
plt4 <- ggplot(plt_data, aes(x = meanrt, fill = wordcat, color = wordcat)) + 
  geom_density(alpha = .8) + labs(title = "Reaction Times by Category", x = "Mean Reaction Time") + theme_minimal() + theme(legend.title=element_blank())

grid.arrange(plt1, plt2, plt3, plt4)

# Importing the weights to see how they correlate with response times and accuracy.

weights = read.csv("data/mfd_weights.csv")

weights <- weights %>% 
  select(-X) %>% 
  rename(words = word)

LDT_words <- expdata_rtfilt %>% 
  filter(task == "LDT" & wordcat == "moralword") %>% 
  group_by(words) %>% 
  summarize(meancorr = mean(corr), 
            meanrt = mean(RT), 
            cat = paste(unique(foundation), collapse = ', '), 
            val = paste(unique(valence), collapse = ', '))

LDT_weighted <- left_join(LDT_words, weights, by = "words")

plot_data <- filter(LDT_weighted, !is.na(weight))

ggplot(plot_data, aes(x = weight, y = meancorr)) + 
  geom_point() + 
  geom_smooth(method='lm',formula=y~x, se=F)

ggplot(plot_data, aes(x = weight, y = meanrt)) + 
  geom_point() + 
  geom_smooth(method='lm',formula=y~x, se=F)

# Looks like nothing really going on here.

AMP EDA

Before we do anything with the AMP, we need to manipulate the data in a way that it is able to be analyzed (turning it from wide to long).

```{r AMP gathering}

AMPtrials <- AMP_trials %>% mutate(subnum = participant) %>% unite("sub_trial", "subnum", "trialnum", sep = "")

AMP_trials_long <- AMP_trials %>% rename("prime_cat" = "wordcat", "prime_foundation" = "foundation", "prime_val" = "valence") %>% gather("prime_target", "words", "words", "nonwords") %>% mutate(prime_target = ifelse(prime_target == "words", "prime", ifelse(prime_target == "nonwords", "target", NA)), val_response = ifelse(keypress == "left", -1, 1), prime_val = ifelse(is.na(prime_val), "nonword", prime_val)) %>% arrange(by = sub_trial)

Okay now we can do some investigating. I'm going to look at the following:

- Plotting differences in responses between moral primes and nonword primes
- Whether the valence of primes leads to differences in responses (valence or speed)
- Whether the weighting of the word in the dictionary is associated with differences in response times

There's a discussion to be had about whether these plots should be done at the word level (each word is an observation within each category) or at the participant level (each participant is one observation within each category). Each gives us different views of the data.

I still haven't incorporated the survey data but I'll do that in the next section.

```{r}
plot_data <- AMP_trials_long %>% 
  group_by(participant) %>% 
  mutate(meanRT = mean(RT)) %>%
  ungroup() %>%
  #filter(!(prime_foundation %in% c("control", "nonword"))) %>%
  group_by(prime_val) %>% 
  filter(prime_target == "prime") %>% 
  summarize(mean_val_response = mean(val_response), rt = mean(RT), meanRT = mean(meanRT)) %>% 
  mutate(phi = (1/(rt/meanRT)) * mean_val_response) %>%
  ungroup()

plot_data %>% do(broom::tidy(t.test(mean_val_response~prime_val, data=.)))

p1 <- ggplot(plot_data, aes(x = mean_val_response, fill = prime_val, color = prime_val)) + 
  geom_density(alpha = .8) + 
  labs(title = "Mean valence of responses by prime category", 
       x = "Valence", 
       y = "Density")

p2 <- ggplot(plot_data, aes(x = rt, fill = prime_val, color = prime_val)) + 
  geom_density(alpha = .8) + 
  labs(title = "Mean RT of responses by prime category", 
       x = "RT", 
       y = "Density")

p3 <- ggplot(plot_data, aes(x = phi, fill = prime_val, color = prime_val)) + 
  geom_density(alpha = .8) + 
  labs(title = "Salience of responses by prime category (phi)", 
       x = "Salience", 
       y = "Density")

grid.arrange(p1, p2, p3, ncol = 2)

Interesting! Looks like the procedure works, but need to test statistically to be sure. Vice words elicit negative valence, virtue words elicit positive valence, and nonwords are somewhere in the middle. Nothing really interesting for RTs. Let's look at the foundations and the weights.

plot_data <- AMP_trials_long %>% group_by(words, prime_foundation, prime_val) %>% 
  filter(prime_target == "prime") %>% 
  summarize(mean_val_response = mean(val_response), meanrt = mean(RT), n = n())

ggplot(plot_data, aes(x = mean_val_response, 
                      y = prime_foundation, 
                      fill = prime_val, 
                      color = prime_val)) +
  geom_density_ridges(alpha = .8, 
                      scale = .9, 
                      jittered_points = TRUE, 
                      position = "raincloud") +
  scale_fill_manual(values=c("#FF6C67", "#00B2FD", "#00BE0D")) + 
  scale_color_manual(values=c("#FF6C67", "#00B2FD", "#00BE0D")) +
  labs(title = "Mean salience of responses by foundation", 
       x = "Salience", 
       y = "Density")

Very neat. Looks like our valences line out quite nicely with the words in the moral categories. Note that the controls aren't plotting because there is only one observation per category (the word "pleasant" and the word "unpleasant"). The mean for "pleasant is $.201$ and the mean for "unpleasant" is $-.34$ so they align roughly as expected. Some of the nonword primes have low numbers of observations (n ~= 4), but as we recruit more participants this should even out.

```{r AMP weights}

AMP_weights <- left_join(AMP_trials_long, weights, by = "words")

plot_data <- AMP_weights %>% filter(prime_cat == "moralword" & prime_target == "prime" & !is.na(weight)) %>% group_by(participant) %>% mutate(meanrt = mean(RT)) %>% ungroup() %>% group_by(participant, words, prime_val) %>% summarize(meanval = mean(val_response), weights = mean(weight, na.rm=TRUE), rt = mean(RT), meanrt = mean(meanrt)) %>% mutate(phi = (1/(rt/meanrt)) * meanval) %>% ungroup() %>% group_by(words, prime_val) %>% summarize(phi = mean(phi), meanval = mean(meanval), weights = mean(weights))

ggplot(plot_data, aes(x = weights, y = meanval, color = prime_val)) + geom_point() + geom_smooth(method = "lm", formula = y ~ x) + labs(title = "Correlation Between E-MFD Weighting and MF-AMP Responses", x = "E-MFD Weights", y = "Mean Reported Valence") + theme_minimal() + theme(legend.title=element_blank())

df <- plot_data %>% filter(prime_val == "negative") Hmisc::rcorr(df$meanval, df$weights)

df <- plot_data %>% filter(prime_val == "positive") Hmisc::rcorr(df$meanval, df$weights)

Another interesting finding. The dictionary weights for virtues and vices are correlated with people's reported valence. Both are significant. Will be interesting to see whether this replicates in a larger sample and with our possibly refined weighting scheme. I think that's good for now with this data. I'll mess with the survey data to see whether it's interesting or not.

# Survey Data Wrangling

```{r survey data wrangling}

#Dropping unneeded rows and columns

surveydata_w <- surveydata %>% slice(-c(0,1)) %>% select(-c(1:17))

# Other misc renaming

surveydata_w <- surveydata_w %>% 
  rename("participant" = "Q1",
         "age" = "Q1.1",
         "sex" = "Q1.2",
         "eth" = "Q1.3",
         "pol_or" = "Q5",
         "religion" = "Q24",
         "nat_speak1" = "Q4.1",
         "nat_speak2" = "Q4.2",
         "nat_speak3" = "Q4.3",
         "handed" = "Q23")

# MFQ Renaming

surveydata_w <- surveydata_w %>% 
  rename("MFQ_care1" = "Q2.1_1",
         "MFQ_fair1" = "Q2.1_2",
         "MFQ_loyalty1" = "Q2.1_3",
         "MFQ_authority1" = "Q2.1_4",
         "MFQ_purity1" = "Q2.1_5", 
         "MFQ_math" = "Q2.1_6",
         "MFQ_care2" = "Q2.1_7",
         "MFQ_fair2" = "Q2.1_8",
         "MFQ_loyalty2" = "Q2.1_9",
         "MFQ_authority2" = "Q2.1_10",
         "MFQ_purity2" = "Q2.1_11",
         "MFQ_care3" = "Q2.1_12",
         "MFQ_fair3" = "Q2.1_13",
         "MFQ_loyalty3" = "Q2.1_14",
         "MFQ_authority3" = "Q2.1_15",
         "MFQ_purity3" = "Q2.1_16",
         "MFQ_care4" = "Q2.2_1",
         "MFQ_fair4" = "Q2.2_2",
         "MFQ_loyalty4" = "Q2.2_3",
         "MFQ_authority4" = "Q2.2_4",
         "MFQ_purity4" = "Q2.2_5",
         "MFQ_good" = "Q2.2_6",
         "MFQ_care5" = "Q2.2_7",
         "MFQ_fair5" = "Q2.2_8",
         "MFQ_loyalty5" = "Q2.2_9",
         "MFQ_authority5" = "Q2.2_10",
         "MFQ_purity5" = "Q2.2_11",
         "MFQ_care6" = "Q2.2_12",
         "MFQ_fair6" = "Q2.2_13",
         "MFQ_loyalty6" = "Q2.2_14",
         "MFQ_authority6" = "Q2.2_15",
         "MFQ_purity6" = "Q2.2_16")

surveydata_w <- surveydata_w %>%
  mutate_at(vars(contains("MFQ")), funs(as.numeric)) %>%
  mutate(care_mean = rowMeans(select(., starts_with("MFQ_care")), na.rm=TRUE),
         fair_mean = rowMeans(select(., starts_with("MFQ_fair")), na.rm=TRUE),
         loyal_mean = rowMeans(select(., starts_with("MFQ_loyalty")), na.rm=TRUE),
         auth_mean = rowMeans(select(., starts_with("MFQ_authority")), na.rm=TRUE),
         sanc_mean = rowMeans(select(., starts_with("MFQ_purity")), na.rm=TRUE),
         MFQ_var = matrixStats::rowVars(as.matrix(select(., starts_with("MFQ"))), na.rm=TRUE))

surveydata_w <- surveydata_w %>% 
  mutate(MFQ_mean = rowMeans(select(., care_mean, fair_mean, loyal_mean, auth_mean, sanc_mean), na.rm=TRUE))

Looks like we have a few participants that did not actually fill out the MFQ (1003, 1020, 1021, 1035, 1040, 1044, 1045, 1060). Let's drop them from further analysis.

vars <- c(1003, 1020, 1021, 1035, 1040, 1044, 1045, 1060)
surveydata_w <- surveydata_w %>% 
  filter(!(participant %in% vars))

Survey Data EDA

Let's do some EDA on the survey data.

Descriptives of MFQ domains
Sex differences in MFQ
Native speaker differences in MFQ domains

plot_data <- surveydata_w %>% gather("domain", "domain_mean", c("care_mean", "fair_mean", "loyal_mean", "auth_mean", "sanc_mean")) %>% group_by(domain) %>% summarize(salience = mean(domain_mean), sd_salience = sd(domain_mean), n = n())

ggplot(plot_data, aes(x = domain, y = salience, fill = domain)) + 
  geom_bar(stat = "identity") + 
  geom_errorbar(aes(ymin=salience-sd_salience, ymax=salience+sd_salience), width=.2,
                 position=position_dodge(.9))

Looks like no real differences across domains. Let's look at sex differences

plot_data <- surveydata_w %>% 
  gather("domain", "domain_mean", c("care_mean", "fair_mean", "loyal_mean", "auth_mean", "sanc_mean")) %>%
  group_by(domain, sex) %>% filter(sex == 1 | sex == 2) %>% 
  summarize(salience = mean(domain_mean), sd_salience = sd(domain_mean), n = n())

ggplot(plot_data, aes(x = domain, y = salience, fill = domain)) + 
  geom_bar(stat = "identity") + 
  geom_errorbar(aes(ymin=salience-sd_salience, ymax=salience+sd_salience), width=.2,
                 position=position_dodge(.9)) + 
  facet_wrap(. ~ sex)

Don't see anything interesting right off the bat. Let's combine the survey data with the exp data to do some more stuff.

Combining Survey with Exp Data

surveydata_w_part <- surveydata_w %>% 
  select(participant, 
         care_mean, 
         fair_mean, 
         loyal_mean, 
         auth_mean, 
         sanc_mean,
         MFQ_mean,
         sex,
         pol_or,
         nat_speak1,
         religion,
         handed)

fulldata <- left_join(expdata_rtfilt, surveydata_w_part, by = "participant")

Now we can do some more analyses.

Influence of moral salience on response times to moral words in the LDT
RT differences between native and non-native speakers
Influence of moral salience on pos/neg ratings in the AMP

LDT Survey EDA

plot_data <- fulldata %>% 
  filter(task == "LDT") %>% 
  group_by(participant, words) %>%
  summarize(meanrt = mean(RT), 
            meancorr = mean(corr), 
            salience = mean(MFQ_mean),
            wordcat = paste(unique(wordcat), collapse = ', ')) %>% 
  filter(meanrt <= 1.5)

ggplot(plot_data, aes(x = salience, y = meanrt)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x) + 
  facet_wrap(. ~ wordcat)

ggplot(plot_data, aes(x = salience, y = meancorr)) + 
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x) +
  facet_wrap(. ~ wordcat)

Looks like people might respond slightly faster to moral words if they have high moral salience but the effect is small if it's anything. Let's look at within-domain salience.

Let's look at whether native speakers respond faster than non-native speakers.

plot_data <- fulldata %>%
  group_by(participant, nat_speak1) %>%
  summarize(meancorr = mean(corr), meanrt = mean(RT)) %>%
  filter(nat_speak1 == 1 | nat_speak1 == 2)

plt1 <- ggplot(plot_data, aes(x = meanrt, fill = nat_speak1, color = nat_speak1)) + 
  geom_density(alpha = .7)

plt2 <- ggplot(plot_data, aes(x = meancorr, fill = nat_speak1, color = nat_speak1)) + 
  geom_density(alpha = .7)

grid.arrange(plt1, plt2, ncol = 2)

Seems clear that native speakers outperform non-native speakers in the AMP. Might be worth controlling for in a future analysis. Let's move on to effects of within-domain salience.

plot_data <- fulldata %>% 
  group_by(participant, words) %>%
  filter(task == "LDT") %>%
  summarize(meanrt = mean(RT),
            meancorr = mean(corr),
            care_sal = mean(care_mean),
            fair_sal = mean(fair_mean),
            loyal_sal = mean(loyal_mean),
            auth_sal = mean(auth_mean),
            sanc_sal = mean(sanc_mean),
            foundation = paste(unique(foundation), collapse = ', '),
            valence = paste(unique(valence), collapse = ', '),
            n = n()) 

p1 = ggplot(data = filter(plot_data, foundation == "care"), 
            aes(x = care_sal, y = meanrt, color = valence)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p2 = ggplot(data = filter(plot_data, foundation == "fairness"), 
            aes(x = fair_sal, y = meanrt, color = valence)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p3 = ggplot(data = filter(plot_data, foundation == "loyalty"), 
            aes(x = loyal_sal, y = meanrt, color = valence)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p4 = ggplot(data = filter(plot_data, foundation == "authority"), 
            aes(x = auth_sal, y = meanrt, color = valence)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p5 = ggplot(data = filter(plot_data, foundation == "sanctity"), 
            aes(x = sanc_sal, y = meanrt, color = valence)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

grid.arrange(p1, p2, p3, p4, p5, ncol = 2)

Seems fairly interesting. Some more thinking to do though. Let's turn to the AMP for now.

AMP Survey EDA

AMP_trials <- fulldata %>% 
  filter(task == "AMP") %>%
  mutate(subnum = participant) %>% 
  unite("sub_trial", "subnum", "trialnum", sep = "_")

AMP_trials_long <- AMP_trials %>% 
  rename("prime_cat" = "wordcat",
         "prime_foundation" = "foundation",
         "prime_val" = "valence") %>%
  gather("prime_target", "words", "words", "nonwords") %>%
  mutate(prime_target = ifelse(prime_target == "words", "prime",
                               ifelse(prime_target == "nonwords", "target", NA)),
         val_response = ifelse(keypress == "left", -1, 1),
         prime_val = ifelse(is.na(prime_val), "nonword", prime_val)) %>%
  arrange(by = sub_trial)

Let's look at the influence of overall moral salience on responses to positive and negative moral words

plot_data <- AMP_trials_long %>% 
  filter(prime_cat == "moralword") %>%
  group_by(participant) %>%
  filter(prime_target == "prime") %>%
  mutate(meanrt = mean(RT, na.rm=TRUE)) %>%
  group_by(participant, words) %>%
  summarize(meanval = mean(val_response), 
            rt = mean(RT),
            meanrt = mean(meanrt),
            MFQ_mean = mean(MFQ_mean),
            foundation = paste(unique(prime_foundation), collapse = ', '),
            valence = paste(unique(prime_val), collapse = ', ')) %>%
  mutate(phi = (1/(rt/meanrt)) * meanval)

ggplot(plot_data, aes(x = MFQ_mean, y = phi)) + 
  geom_point() + 
  geom_smooth(method = "lm", formula = y~x) + 
  facet_wrap(. ~ valence)

Interesting. We get a slightly positive correlation between MFQ salience and valence, but it's not significant. Not exactly what I expected. Let's look at individual domains.

plot_data <- AMP_trials_long %>% 
  filter(prime_cat == "moralword") %>%
  group_by(participant) %>%
  filter(prime_target == "prime") %>%
  mutate(meanrt = mean(RT, na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(participant, words) %>%
  summarize(rt = mean(RT),
            meanrt = mean(meanrt),
            meanval = mean(val_response),
            care_sal = mean(care_mean),
            fair_sal = mean(fair_mean),
            loyal_sal = mean(loyal_mean),
            auth_sal = mean(auth_mean),
            sanc_sal = mean(sanc_mean),
            pol_or = mean(as.numeric(as.character(pol_or)), na.rm=TRUE),
            relig = mean(as.numeric(as.character(religion)), na.rm = TRUE),
            prime_foundation = paste(unique(prime_foundation), collapse = ', '),
            prime_val = paste(unique(prime_val), collapse = ', '),
            n = n()) %>%
  ungroup() %>%
  mutate(phi = (1/(rt/meanrt)) * meanval) %>%
  group_by(participant, words) %>%
  mutate(phi = mean(phi))

p1 <- ggplot(data = filter(plot_data, prime_foundation == "care"), 
             aes(x = care_sal, y = phi, color = prime_val)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p2 <- ggplot(data = filter(plot_data, prime_foundation == "fairness"), 
             aes(x = fair_sal, y = phi, color = prime_val)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p3 <- ggplot(data = filter(plot_data, prime_foundation == "loyalty"), 
             aes(x = loyal_sal, y = phi, color = prime_val)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p4 <- ggplot(data = filter(plot_data, prime_foundation == "authority"), 
             aes(x = auth_sal, y = phi, color = prime_val)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

p5 <- ggplot(data = filter(plot_data, prime_foundation == "sanctity"), 
             aes(x = sanc_sal, y = phi, color = prime_val)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x)

grid.arrange(p1, p2, p3, p4, p5, ncol = 2)

Pretty uninterpretable IMHO. Main effect of valence, but not really anything with MFQ salience as far as I can tell. Let's look at a couple of the other self-report things., namely religiosity and political affiliation.

plot_data <- AMP_trials_long %>% 
  filter(prime_cat == "moralword") %>%
  group_by(participant) %>%
  filter(prime_target == "prime") %>%
  mutate(meanrt = psych::harmonic.mean(RT, na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(participant, prime_foundation) %>%
  summarize(rt = psych::harmonic.mean(RT),
            meanval = mean(val_response),
            phi = mean((1/(rt/meanrt)) * meanval),
            pol_or = mean(as.numeric(as.character(pol_or)), na.rm=TRUE)) %>%
  ungroup()

ggplot(plot_data, aes(x = pol_or, y = phi)) + geom_point() + geom_smooth(method= "lm", formula=y~x) + facet_wrap(.~prime_foundation) + labs(title = "Relationship between political orientation and moral salience within foundations", x = "Political Orientation", y = "Salience (φ)") + theme_minimal()

Let's follow up on that political orientation plot and just do some bar plots.

plot_data <- AMP_trials_long %>% 
  filter(prime_cat == "moralword") %>%
  group_by(participant) %>%
  filter(prime_target == "prime") %>%
  mutate(meanrt = psych::harmonic.mean(RT, na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(participant, sub_trial) %>%
  summarize(rt = psych::harmonic.mean(RT),
            meanval = mean(val_response),
            phi = (1/(rt/meanrt)) * meanval,
            pol_or = mean(as.numeric(as.character(pol_or)), na.rm=TRUE),
            prime_foundation = paste(unique(prime_foundation), collapse = ', '),
            prime_val = paste(unique(prime_val), collapse = ', '),
            words = paste(unique(words), collapse = ', ')) %>%
  ungroup() %>%
  group_by(participant,words) %>%
  mutate(meanval = mean(meanval),
         sd = sd(phi),
         n = n(),
         pol_cat = ifelse(pol_or > 3, 2,
                          ifelse(pol_or < 3, 1, NA))) %>%
  ungroup()

plot_data <- plot_data %>% 
  filter(!is.na(pol_cat)) %>%
  group_by(pol_cat, words, prime_foundation, prime_val) %>%
  summarize(sd = sd(phi), phi = mean(phi)) %>%
  ungroup() %>%
  group_by(pol_cat, prime_foundation, prime_val) %>%
  summarize(sd = sd(phi), phi = mean(phi))


ggplot(plot_data, aes(fill = as.factor(pol_cat), y = phi, x = prime_val)) + 
  geom_bar(stat="summary", fun.y="mean", position = "dodge") +
  geom_errorbar(aes(ymin=phi-sd, ymax=phi+sd), 
                width=.2, position=position_dodge(.9)) + 
  facet_wrap(.~prime_foundation)

Previousprelim_analysis_fh Nextexp_files_s19

Last updated 5 years ago

Was this helpful?