Skip to content

Commit

Permalink
fixed iss body region issue. Extremities region was being mapped to g…
Browse files Browse the repository at this point in the history
…eneral body region for gem_max and gem_min methods as well as for the icd 9 to ais lookup table. This was a bug in the program that should be fixed.
  • Loading branch information
ablack3 committed Jun 9, 2019
1 parent aed2c3b commit 03f3007
Show file tree
Hide file tree
Showing 9 changed files with 8,293 additions and 8,219 deletions.
Binary file modified R/sysdata.rda
Binary file not shown.
7,676 changes: 3,838 additions & 3,838 deletions lookup_tables/i10_map_max.csv

Large diffs are not rendered by default.

7,676 changes: 3,838 additions & 3,838 deletions lookup_tables/i10_map_min.csv

Large diffs are not rendered by default.

1,068 changes: 534 additions & 534 deletions lookup_tables/ntab_s1.csv

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions prelim/create etab_s1 ntab_s1/create etab_s1 ntab_s1.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ ntab_s1[which(ntab_s1$dx == 85011), "severity"] <- 2
ntab_s1[which(ntab_s1$dx == 8628), "severity"] <- 5

# convert issbr to character : make sure map_issbr function is loaded.
ntab_s1 <- ntab_s1 %>% rowwise() %>% mutate(issbr2 = map_issbr(issbr))
ntab_s1 <-
ntab_s1 %>%
rowwise() %>%
mutate(issbr2 = map_issbr(issbr)) %>%
ungroup()

# check conversion
ntab_s1 %>% select(starts_with("issbr")) %>% unique()
count(ntab_s1, issbr, issbr2)

# drop original and convert to dataframe (not tibble)
ntab_s1 <- ntab_s1 %>% select(-issbr) %>% rename(issbr = issbr2) %>% as.data.frame()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ gemconflicts <- gem2 %>%
# how many i10 codes do we need to map manually?
length(unique(gemconflicts$i10))
# 47 codes

# now looks like 123 codes... Not sure why that changed.


# manually assign body region when there is a comflict using this rule:
Expand All @@ -113,7 +113,8 @@ map_br <- function(desc){
else if(grepl(" ear| eye",desc)){ br <- 2}
else if(grepl("thorax", desc)){ br <- 3}
else if(grepl("pelvis|lower limb|thigh|knee|lower leg|ankle|foot|toe|upper limb|upper arm|elbow|forearm|wrist|hand|palm|finger|thumb", desc)){ br <- 5}
br
# map_issbr is defined in the mapping functions R script. It maps the numeric code to a text description.
map_issbr(br)
}

# test mapping function
Expand All @@ -127,14 +128,15 @@ gem2 <- left_join(gem2, gemconflicts[ , c("i10", "new_br")], by = "i10")

sum(!is.na(gem2$new_br))
# 373 rows in the GEM have non-missing new body region values
# June 2019 update - 1057 rows now.

# just check that there are only 47 codes with non missing new body region
gem2 %>%
filter(!is.na(new_br)) %>%
.[["i10"]] %>%
unique() %>%
length()

# June 2019 - 123 codes

# only use new body region if there is a conflict
gem2 <- gem2 %>% mutate(new_br2 = ifelse(is.na(new_br), issbr, new_br))
Expand Down Expand Up @@ -226,6 +228,10 @@ gem3_min$severity <- ifelse(gem3_min$severity == 9, NA, gem3_min$severity)
unique(gem3_max$severity)
unique(gem3_min$severity)

# check iss body region distribution
count(gem3_max, issbr)
count(gem3_min, issbr)

# output
write_csv(gem3_min, "./lookup_tables/i10_map_min.csv")
write_csv(gem3_max, "./lookup_tables/i10_map_max.csv")
Expand Down
14 changes: 11 additions & 3 deletions prelim/create sysdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# read in lookup tables from the lookup tables folder and save add them to the package sysdata object

library(dplyr)
rm(list = ls())

list.files("./lookup_tables")

sev_cc <- c("character", "integer", "character")
Expand All @@ -27,6 +27,14 @@ i10_ecode <- read.csv("./lookup_tables/i10_ecode.csv", stringsAsFactors = F, col
# Original ecode mapping changed to text instead of numeric codes
etab_s1 <- read.csv("./lookup_tables/etab_s1.csv", stringsAsFactors = F, colClasses = "character")

# check frequencies of issbr
library(purrr)
library(dplyr)
l <- lst(i10_map_emp, i10_map_roc, i10_map_max, i10_map_min, ntab_s1)
n <- names(l)
map2(l, n, ~count(.x, issbr, name = .y)) %>%
reduce(full_join, by = "issbr")


# check col classes. These must ultimately be the same so they can be combined with rbind().

Expand Down Expand Up @@ -56,7 +64,7 @@ head(rbind(etab_s1, i10_ecode))
# no errors

# create internal data
devtools::use_data(
usethis::use_data(
i10_map_min,
i10_map_max,
i10_map_emp,
Expand All @@ -72,5 +80,5 @@ devtools::use_data(
head(i10_map_emp)

# add prelim directory to r build ignore
devtools::use_build_ignore("prelim")
usethis::use_build_ignore("prelim")

3 changes: 2 additions & 1 deletion prelim/mapping functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ map_issbr <- function(i){
else if(i==2) "Face"
else if(i==3) "Chest"
else if(i==4) "Abdomen"
else if(i %in% c(5,6)) "General"
else if(i==5) "Extremities"
else if(i==6) "General"
else "Unknown"
}

55 changes: 55 additions & 0 deletions prelim/speedup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

# this is the section that needs optimizing!
devtools::load_all()
etab <- rbind(etab_s1, i10_ecode)



df <- data.frame(key = 1:9,
dx1 = c("R65.21", "S02.40EA", "R78.81", "K85.90", "Z68.44", "Z68.1", "Z79.4", "R33.9", "R09.02"),
dx2 = c("R13.10", "N17.9", "R53.1", "R31.9", "Z92.241", "R29.810", "R33.9", "Q76.2", "Z90.710"),
dx3 = c("Z90.13", "R56.9", "M71.22", "R13.10", "N17.9", "R04.0", "M19.90", "R00.0", "Z87.891"),
dx4 = c("I10", "Z85.820", "J96.90", "G89.11", "K92.2", "Z85.43", "N39.0", "J96.90", "H35.00"),
dx5 = c("G10", "J90", "Z66", "F32.9", "K76.0", "Z85.3", "N18.9", "J69.0", "E88.09"),
dx6 = c("J45.909", "R91.1", "E87.5", "F10.221", "Z51.5", "I82.4Z2", "N17.9", "E87.6", "J81.1"),
dx7 = c("R50.9", "N39.0", "J44.9", "K57.30", "E87.2", "I10", "I10", "E83.51", "F05"),
dx8 = c("I11.0", "I46.9", "R06.82", "G89.11", "K21.9", "E88.09", "E83.42", "E87.5", "E86.9"),
dx9 = c("I10", "E83.42", "J86.9", "G81.94", "R00.1", "E83.39", "F32.9", "E83.52", "E87.3"), stringsAsFactors = F)

# get ecode column names
ecode_colnames <- paste0("ecode_", 1:4)

#create ecode columns
df[ , ecode_colnames] <- NA

# for each row extract the first 4 ecodes and add them to the e-code columns
# icd10 e-codes do not start with E.

# get a list of all ecodes (includes icd10 code if requested)
ecode_regex <- paste0("^", etab$dx, collapse = "|")

profvis::profvis({
df[ , ecode_colnames] <- t(apply(df, 1, function(row){
# remove decimal
row <- sub("\\.", "", row)
# get all e codes using pattern matching
row_ecodes <- stringr::str_extract(as.character(unlist(row)), ecode_regex)
# row_ecodes <- grep(ecode_regex, as.character(unlist(row)), value = T) #1370
# remove na values
row_ecodes <- na.omit(row_ecodes)
# save first 4 Ecodes
row_ecodes[1:4]
}))

})
grep(ecode_regex, sub("\\.", "", as.character(df[6,-1])), value = T)

grep(ecode_regex, c("X600", "X53", "hi","Y82"), value = T)

as.matrix(df[,-1]) %>%
t() %>%
.[,]
stringr::str_extract( ecode_regex)
{.}


0 comments on commit 03f3007

Please sign in to comment.