library(tidyverse)
library(mapview)
library(here)
library(htmltools)
library(purrr)
library(jsonlite)
library(configr)
library(RcppTOML)
source(here("analysis/_functions.R"))
Native American Markers
These categories:
= c("indian", "native americans", "settlers")
native_american_topic_phrases
= marker_categories_df %>%
native_categories filter(str_detect(category, paste("\\b", native_american_topic_phrases, "\\b", sep = "", collapse = "|")))
unique(native_categories$category)
[1] "native americans" "settlements & settlers" "war, french and indian"
[4] "wars, us indian"
These series:
= c("indian", "native americans", "settlers")
native_american_topic_phrases
= marker_series_df %>%
native_series filter(str_detect(name, paste("\\b", native_american_topic_phrases, "\\b", sep = "", collapse = "|")) | str_detect(optional_title, paste("\\b", native_american_topic_phrases, "\\b", sep = "", collapse = "|")) )
%>%
native_series distinct(name, optional_title)
name optional_title
1 great indian warrior / trading path roads and trails
2 indian wars battlefield trails wars, u.s. indian
3 black hawk war wars, u.s. indian
4 potawatomi trail of death wars, u.s. indian
5 the nez perce trail wars, u.s. indian
6 mary jemison - white woman of the genessee war, french & indian
Create two different indigenous marker datasets, a data frame of all Native American markers ever put up and ones that are currently up.
= tibble(marker_id = c(native_series$marker_id,native_categories$marker_id ))
all_native_american_marker_id
= all_valid_markers_df %>%
native_american_markers_all_df filter(marker_id %in% all_native_american_marker_id$marker_id )
= all_valid_markers_up_df %>%
native_american_markers_up_df filter(marker_id %in% all_native_american_marker_id$marker_id )
Sentence
+up_first_sunday_podcast: We found more than 15,000 markers across the country that mentioned Native Americans,
+digital: Across the country, more than 15,000 markers mention Native Americans.
All of the series and categories are specifically about Native Americans except for the category "settlements & settlers"
. Therefore to get a list of all markers that mention Native Americans I will:
- Automatically include any marker taged in the series or category mentioned above that is not a
"settlements & settlers"
- For the
"settlements & settlers"
markers, I will search for phrases related to Native Americans and only include those in the total
= native_categories %>%
settlements_settlers_marker_id filter(category_id == 12)
# Search just for settlement markers that arent part of another native american category
= all_native_american_marker_id %>%
settlements_to_search summarize(n = n(), .by = marker_id) %>%
filter(marker_id %in% settlements_settlers_marker_id$marker_id & n == 1)
# Markers belong to just the native american categories we defined (exlcuding just settlements)
= all_native_american_marker_id %>%
all_native_excluding_solo_settlemnts summarize(n = n(), .by = marker_id) %>%
filter(!(marker_id %in% settlements_settlers_marker_id$marker_id & n == 1))
# Search the just settlement markers for native american terms (including racists terms)
= native_american_markers_up_df %>%
solo_settlements_about_natives filter(marker_id %in% settlements_to_search$marker_id ) %>%
filter(str_detect(text,paste("\\b", c("indian", "indians", "red skin", "red skins", "redskin", "redskins", "native american", "native americans"), "\\b", sep = "", collapse = "|")) )
# The total number is then the two data frames rows added
= tibble(marker_id = c(all_native_excluding_solo_settlemnts$marker_id,solo_settlements_about_natives$marker_id ))
total_indian_markers
= native_american_markers_up_df %>%
native_american_markers_up_df filter(marker_id %in% total_indian_markers$marker_id)
nrow(native_american_markers_up_df)
[1] 15200
Sentence:
+up_first_sunday_podcast: Hundreds of markers still call Native Americans savages, hostile, or use racial slurs.
+digital: From the Atlantic through the Plains, more than 270 markers describe Native Americans as “savage,” “hostile” or “semi-civilized,” or they use racial slurs.
+marking_the_frontier_radio: NPR found hundreds of markers that still call Native Americans savage, hostile, or use racial slurs.
= c("mound builders",
pattern "stone age",
"hostile indian",
"hostile tribe",
"hostiles",
"savages",
"indian savages",
"savage boy",
"savage indian",
"civilized tribes",
"mound-building",
"vengeful",
"semi-civilized",
"\\bred men\\b",
"\\bred man\\b",
"\\bredmen\\b",
"\\bredman\\b",
"red paint people",
"half-breed",
"civilized indian",
"indian brave",
"redskin",
"hostile red indian")
# Gathered for handcheck
= native_american_markers_up_df %>%
offensive_native_markers_df filter(str_detect(text, paste(pattern, collapse = "|")) | str_detect(title, paste(pattern, collapse = "|")) ) %>%
mutate(matched_words_text = sapply(str_extract_all(text, paste(pattern, collapse = "|")), paste, collapse = ", "), .before = url) %>%
mutate(matched_words_title = sapply(str_extract_all(title, paste(pattern, collapse = "|")), paste, collapse = ", "), .before = url)
# Read in handchecked data
= read.csv(here("data/handmade/offensive_native_markers - offensive_native_markers.csv"))
offensive_american_indian_handchecked_df
%>%
offensive_american_indian_handchecked_df filter(yes_no_maybe == "yes") %>%
summarize(n = n(), .by = yes_no_maybe)
yes_no_maybe n
1 yes 274
count(offensive_american_indian_handchecked_df, yes_no_maybe) %>%
cat_table("Offensive Native American Markers")
Offensive Native American Markers
Sentence:
+up_first_sunday_podcast: We found at least two hundred that tell an eerily similar story: Innocent white settlers were minding their own business when one day for no reason Native Americans appeared and killed them in cold blood.
+up_first_sunday_podcast: We are, we’re heading to the Midwest, to the frontier where we found markers that really glorify white settlers, but vilify Native Americans.
+digital: At least 200 markers tell an eerily similar American tale: Native Americans attacked innocent white settlers for no reason.
+marking_the_frontier_radio: More than 200 tell an eerily similar story: Native Americans killed innocent white settlers in cold blood.
Used regex patterns to search for iterations of common phrases used when talking about settlers being killed by Native Americans, and then handchecked them.
# Define regex pattern for identifying instances of killings by Indians
<- "(?i)\\b(?:killed|attacked|murdered|massacre(?:d)?|slain|carried away)\\b(?:\\s+\\w+)?\\s+(?:by\\s+the\\s+)?(?:\\w+\\s+)*Indians?"
indian_pattern
= native_american_markers_up_df %>%
killed_by_indians_df filter(grepl(indian_pattern, text, ignore.case = TRUE, perl = TRUE) | str_detect(title, "massacre")) %>%
mutate(mentions_settlers = str_detect(text, "settler|pioneer|family|families|child|children|father|husband|wife|wives|brother"), .before = url) %>%
mutate(version = "1") %>%
select(marker_id, url, mentions_settlers, version)
# Previous classification
= read_csv(here("data/handmade/killed_by_indians_check1.csv")) %>%
massacre_indian_settler_df mutate(version = "2") %>%
full_join(killed_by_indians_df, join_by(marker_id))
# write_csv(massacre_indian_settler_df, here("data/processed/native_massacre_final.csv"))
# Read in handchecked data
= read_csv(here("data/handmade/killed_by_indians_check2.csv")) %>%
killed_by_indians_checked_df filter(killed_by_indians == "yes")
%>%
killed_by_indians_checked_df count(killed_by_indians) %>%
cat_table("Markers that mention settlers being killed by Native Americans")
Markers that mention settlers being killed by Native Americans