Native American Markers

library(tidyverse)
library(mapview)
library(here)
library(htmltools)
library(purrr)
library(jsonlite)
library(configr)
library(RcppTOML)


source(here("analysis/_functions.R"))

These categories:

native_american_topic_phrases = c("indian", "native americans", "settlers")

native_categories = marker_categories_df  %>% 
    filter(str_detect(category, paste("\\b", native_american_topic_phrases, "\\b", sep = "", collapse = "|")))

unique(native_categories$category)

[1] "native americans"       "settlements & settlers" "war, french and indian"
[4] "wars, us indian"

These series:

native_american_topic_phrases = c("indian", "native americans", "settlers")

native_series = marker_series_df  %>% 
    filter(str_detect(name, paste("\\b", native_american_topic_phrases, "\\b", sep = "", collapse = "|")) | str_detect(optional_title, paste("\\b", native_american_topic_phrases, "\\b", sep = "", collapse = "|"))  )

native_series  %>% 
    distinct(name, optional_title)

                                        name       optional_title
1        great indian warrior / trading path     roads and trails
2             indian wars battlefield trails    wars, u.s. indian
3                             black hawk war    wars, u.s. indian
4                  potawatomi trail of death    wars, u.s. indian
5                        the nez perce trail    wars, u.s. indian
6 mary jemison - white woman of the genessee war, french & indian

Create two different indigenous marker datasets, a data frame of all Native American markers ever put up and ones that are currently up.

all_native_american_marker_id = tibble(marker_id = c(native_series$marker_id,native_categories$marker_id ))

native_american_markers_all_df = all_valid_markers_df  %>% 
    filter(marker_id %in% all_native_american_marker_id$marker_id ) 

native_american_markers_up_df = all_valid_markers_up_df  %>% 
    filter(marker_id %in% all_native_american_marker_id$marker_id )

Sentence

+up_first_sunday_podcast: We found more than 15,000 markers across the country that mentioned Native Americans,

+digital: Across the country, more than 15,000 markers mention Native Americans.

All of the series and categories are specifically about Native Americans except for the category "settlements & settlers". Therefore to get a list of all markers that mention Native Americans I will:

Automatically include any marker taged in the series or category mentioned above that is not a "settlements & settlers"
For the "settlements & settlers" markers, I will search for phrases related to Native Americans and only include those in the total

settlements_settlers_marker_id = native_categories  %>% 
    filter(category_id == 12)

# Search just for settlement markers that arent part of another native american category
settlements_to_search = all_native_american_marker_id  %>% 
    summarize(n = n(), .by = marker_id) %>% 
    filter(marker_id %in% settlements_settlers_marker_id$marker_id & n == 1)

# Markers belong to just the native american categories we defined (exlcuding just settlements)

all_native_excluding_solo_settlemnts = all_native_american_marker_id  %>% 
    summarize(n = n(), .by = marker_id) %>% 
    filter(!(marker_id %in% settlements_settlers_marker_id$marker_id & n == 1))


# Search the just settlement markers for native american terms (including racists terms)
solo_settlements_about_natives = native_american_markers_up_df   %>% 
    filter(marker_id %in% settlements_to_search$marker_id ) %>% 
    filter(str_detect(text,paste("\\b", c("indian", "indians", "red skin", "red skins", "redskin", "redskins", "native american", "native americans"), "\\b", sep = "", collapse = "|")) )


# The total number is then the two data frames rows added

total_indian_markers = tibble(marker_id = c(all_native_excluding_solo_settlemnts$marker_id,solo_settlements_about_natives$marker_id ))

native_american_markers_up_df = native_american_markers_up_df  %>% 
    filter(marker_id %in% total_indian_markers$marker_id)


nrow(native_american_markers_up_df)

[1] 15200

Sentence:

+up_first_sunday_podcast: Hundreds of markers still call Native Americans savages, hostile, or use racial slurs.

+digital: From the Atlantic through the Plains, more than 270 markers describe Native Americans as “savage,” “hostile” or “semi-civilized,” or they use racial slurs.

+marking_the_frontier_radio: NPR found hundreds of markers that still call Native Americans savage, hostile, or use racial slurs.

pattern = c("mound builders",
"stone age",
"hostile indian",
"hostile tribe",
"hostiles",
"savages",
"indian savages",
"savage boy",
"savage indian",
"civilized tribes",
"mound-building",
"vengeful",
"semi-civilized",
"\\bred men\\b",
"\\bred man\\b",
"\\bredmen\\b",
"\\bredman\\b",
"red paint people",
"half-breed",
"civilized indian",
"indian brave",
"redskin",
"hostile red indian")

# Gathered for handcheck
offensive_native_markers_df = native_american_markers_up_df  %>% 
    filter(str_detect(text, paste(pattern, collapse = "|")) | str_detect(title, paste(pattern, collapse = "|")) ) %>% 
     mutate(matched_words_text = sapply(str_extract_all(text, paste(pattern, collapse = "|")), paste, collapse = ", "), .before = url)  %>% 
     mutate(matched_words_title = sapply(str_extract_all(title, paste(pattern, collapse = "|")), paste, collapse = ", "), .before = url) 




# Read in handchecked data
offensive_american_indian_handchecked_df = read.csv(here("data/handmade/offensive_native_markers - offensive_native_markers.csv"))


offensive_american_indian_handchecked_df  %>% 
    filter(yes_no_maybe == "yes") %>% 
    summarize(n = n(), .by = yes_no_maybe)

  yes_no_maybe   n
1          yes 274

count(offensive_american_indian_handchecked_df, yes_no_maybe) %>% 
    cat_table("Offensive Native American Markers")

Offensive Native American Markers

Sentence:

+up_first_sunday_podcast: We found at least two hundred that tell an eerily similar story: Innocent white settlers were minding their own business when one day for no reason Native Americans appeared and killed them in cold blood.

+up_first_sunday_podcast: We are, we’re heading to the Midwest, to the frontier where we found markers that really glorify white settlers, but vilify Native Americans.

+digital: At least 200 markers tell an eerily similar American tale: Native Americans attacked innocent white settlers for no reason.

+marking_the_frontier_radio: More than 200 tell an eerily similar story: Native Americans killed innocent white settlers in cold blood.

Used regex patterns to search for iterations of common phrases used when talking about settlers being killed by Native Americans, and then handchecked them.

# Define regex pattern for identifying instances of killings by Indians
indian_pattern <- "(?i)\\b(?:killed|attacked|murdered|massacre(?:d)?|slain|carried away)\\b(?:\\s+\\w+)?\\s+(?:by\\s+the\\s+)?(?:\\w+\\s+)*Indians?"


killed_by_indians_df = native_american_markers_up_df  %>% 
    filter(grepl(indian_pattern, text, ignore.case = TRUE, perl = TRUE) | str_detect(title, "massacre")) %>% 
    mutate(mentions_settlers = str_detect(text, "settler|pioneer|family|families|child|children|father|husband|wife|wives|brother"), .before = url) %>% 
    mutate(version = "1") %>% 
    select(marker_id, url, mentions_settlers, version)

# Previous classification
massacre_indian_settler_df = read_csv(here("data/handmade/killed_by_indians_check1.csv")) %>% 
    mutate(version = "2") %>% 
    full_join(killed_by_indians_df, join_by(marker_id))


# write_csv(massacre_indian_settler_df, here("data/processed/native_massacre_final.csv"))

# Read in handchecked data

killed_by_indians_checked_df = read_csv(here("data/handmade/killed_by_indians_check2.csv")) %>% 
    filter(killed_by_indians == "yes")


killed_by_indians_checked_df  %>% 
    count(killed_by_indians) %>% 
    cat_table("Markers that mention settlers being killed by Native Americans")

Markers that mention settlers being killed by Native Americans