From 8ad1e780f2d3b4d74f8ac992511a01977bb0fb60 Mon Sep 17 00:00:00 2001 From: Tarotis Date: Wed, 20 Dec 2023 11:01:10 +0100 Subject: [PATCH] v0.1rc3 --- scripts/07_utils.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/07_utils.R b/scripts/07_utils.R index 8f90943..177e3e7 100644 --- a/scripts/07_utils.R +++ b/scripts/07_utils.R @@ -18,10 +18,19 @@ data <- read_tsv('data.tsv') %>% lang_vec <- unique(data$Language) languages <- read_csv('../doreco/cldf/languages.csv') +# Speaker Table has 396 entries, but only 393 contribute data +speakers <- read_csv('../doreco/cldf/speakers.csv') +data %>% group_by(Speaker) %>% summarise(n=n()) + ################################### ### Preprocessing numbers ### ################################### -count_speaker <- data %>% group_by(Glottocode) %>% +spk <- tibble(ID = unique(data$Speaker)) +speaker_sex <- speakers %>% + inner_join(spk) %>% group_by(sex) %>% + summarise("Sex"=n()) + +count_speaker <- data %>% # group_by(Glottocode) %>% summarise("Speakers"=n_distinct(Speaker)) count_ipu <- data %>% group_by(Glottocode) %>%