fixed iss body region issue. Extremities region was being mapped to g…

…eneral body region for gem_max and gem_min methods as well as for the icd 9 to ais lookup table. This was a bug in the program that should be fixed.
ablack3 · Jun 9, 2019 · 03f3007 · 03f3007
1 parent aed2c3b
commit 03f3007
Show file tree

Hide file tree

Showing 9 changed files with 8,293 additions and 8,219 deletions.
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/lookup_tables/i10_map_max.csv b/lookup_tables/i10_map_max.csv
diff --git a/lookup_tables/i10_map_min.csv b/lookup_tables/i10_map_min.csv
diff --git a/lookup_tables/ntab_s1.csv b/lookup_tables/ntab_s1.csv
diff --git a/prelim/create etab_s1 ntab_s1/create etab_s1 ntab_s1.R b/prelim/create etab_s1 ntab_s1/create etab_s1 ntab_s1.R
@@ -18,10 +18,14 @@ ntab_s1[which(ntab_s1$dx == 85011), "severity"] <- 2
 ntab_s1[which(ntab_s1$dx == 8628), "severity"] <- 5
 
 # convert issbr to character : make sure map_issbr function is loaded.
-ntab_s1 <- ntab_s1 %>% rowwise() %>% mutate(issbr2 = map_issbr(issbr))
+ntab_s1 <-
+      ntab_s1 %>%
+      rowwise() %>%
+      mutate(issbr2 = map_issbr(issbr)) %>%
+      ungroup()
 
 # check conversion
-ntab_s1 %>% select(starts_with("issbr")) %>% unique()
+count(ntab_s1, issbr, issbr2)
 
 # drop original and convert to dataframe (not tibble)
 ntab_s1 <- ntab_s1 %>% select(-issbr) %>% rename(issbr = issbr2) %>%  as.data.frame()

diff --git a/prelim/create i10_map_min i10_map_max/create i10_map_min i10_map_max.R b/prelim/create i10_map_min i10_map_max/create i10_map_min i10_map_max.R
@@ -95,7 +95,7 @@ gemconflicts <- gem2 %>%
 # how many i10 codes do we need to map manually?
 length(unique(gemconflicts$i10))
 # 47 codes
-
+# now looks like 123 codes... Not sure why that changed.
 
 
 # manually assign body region when there is a comflict using this rule:
@@ -113,7 +113,8 @@ map_br <- function(desc){
       else if(grepl(" ear| eye",desc)){ br <- 2}
       else if(grepl("thorax", desc)){ br <- 3}
       else if(grepl("pelvis|lower limb|thigh|knee|lower leg|ankle|foot|toe|upper limb|upper arm|elbow|forearm|wrist|hand|palm|finger|thumb", desc)){ br <- 5}
-      br
+      # map_issbr is defined in the mapping functions R script. It maps the numeric code to a text description.
+      map_issbr(br)
 }
 
 # test mapping function
@@ -127,14 +128,15 @@ gem2 <- left_join(gem2, gemconflicts[ , c("i10", "new_br")], by = "i10")
 
 sum(!is.na(gem2$new_br))
 # 373 rows in the GEM have non-missing new body region values
+# June 2019 update - 1057 rows now.
 
 # just check that there are only 47 codes with non missing new body region
 gem2 %>%
       filter(!is.na(new_br)) %>%
       .[["i10"]] %>%
       unique() %>%
       length()
-
+# June 2019 - 123 codes
 
 # only use new body region if there is a conflict
 gem2 <- gem2 %>% mutate(new_br2 = ifelse(is.na(new_br), issbr, new_br))
@@ -226,6 +228,10 @@ gem3_min$severity <- ifelse(gem3_min$severity == 9, NA, gem3_min$severity)
 unique(gem3_max$severity)
 unique(gem3_min$severity)
 
+# check iss body region distribution
+count(gem3_max, issbr)
+count(gem3_min, issbr)
+
 # output
 write_csv(gem3_min, "./lookup_tables/i10_map_min.csv")
 write_csv(gem3_max, "./lookup_tables/i10_map_max.csv")

diff --git a/prelim/create sysdata.R b/prelim/create sysdata.R
@@ -2,7 +2,7 @@
 # read in lookup tables from the lookup tables folder and save add them to the package sysdata object
 
 library(dplyr)
-rm(list = ls())
+
 list.files("./lookup_tables")
 
 sev_cc <- c("character", "integer", "character")
@@ -27,6 +27,14 @@ i10_ecode <- read.csv("./lookup_tables/i10_ecode.csv", stringsAsFactors = F, col
 # Original ecode mapping changed to text instead of numeric codes
 etab_s1 <- read.csv("./lookup_tables/etab_s1.csv", stringsAsFactors = F, colClasses = "character")
 
+# check frequencies of issbr
+library(purrr)
+library(dplyr)
+l <- lst(i10_map_emp, i10_map_roc, i10_map_max, i10_map_min, ntab_s1)
+n <- names(l)
+map2(l, n, ~count(.x, issbr, name = .y)) %>%
+      reduce(full_join, by = "issbr")
+
 
 # check col classes. These must ultimately be the same so they can be combined with rbind().
 
@@ -56,7 +64,7 @@ head(rbind(etab_s1, i10_ecode))
 # no errors
 
 # create internal data
-devtools::use_data(
+usethis::use_data(
       i10_map_min,
       i10_map_max,
       i10_map_emp,
@@ -72,5 +80,5 @@ devtools::use_data(
 head(i10_map_emp)
 
 # add prelim directory to r build ignore
-devtools::use_build_ignore("prelim")
+usethis::use_build_ignore("prelim")
 
diff --git a/prelim/mapping functions.R b/prelim/mapping functions.R
@@ -57,7 +57,8 @@ map_issbr <- function(i){
       else if(i==2) "Face"
       else if(i==3) "Chest"
       else if(i==4) "Abdomen"
-      else if(i %in% c(5,6)) "General"
+      else if(i==5) "Extremities"
+      else if(i==6) "General"
       else "Unknown"
 }
 
diff --git a/prelim/speedup.R b/prelim/speedup.R
@@ -0,0 +1,55 @@
+
+# this is the section that needs optimizing!
+devtools::load_all()
+etab <- rbind(etab_s1, i10_ecode)
+
+
+
+df <- data.frame(key = 1:9,
+           dx1 = c("R65.21", "S02.40EA", "R78.81", "K85.90", "Z68.44", "Z68.1", "Z79.4", "R33.9", "R09.02"),
+           dx2 = c("R13.10", "N17.9", "R53.1", "R31.9", "Z92.241", "R29.810", "R33.9", "Q76.2", "Z90.710"),
+           dx3 = c("Z90.13", "R56.9", "M71.22", "R13.10", "N17.9", "R04.0", "M19.90", "R00.0", "Z87.891"),
+           dx4 = c("I10", "Z85.820", "J96.90", "G89.11", "K92.2", "Z85.43", "N39.0", "J96.90", "H35.00"),
+           dx5 = c("G10", "J90", "Z66", "F32.9",  "K76.0", "Z85.3", "N18.9", "J69.0", "E88.09"),
+           dx6 = c("J45.909", "R91.1", "E87.5", "F10.221", "Z51.5", "I82.4Z2", "N17.9", "E87.6", "J81.1"),
+           dx7 = c("R50.9", "N39.0", "J44.9", "K57.30", "E87.2", "I10", "I10", "E83.51", "F05"),
+           dx8 = c("I11.0", "I46.9", "R06.82", "G89.11", "K21.9", "E88.09", "E83.42", "E87.5", "E86.9"),
+           dx9 = c("I10", "E83.42", "J86.9", "G81.94", "R00.1", "E83.39", "F32.9", "E83.52", "E87.3"), stringsAsFactors = F)
+
+# get ecode column names
+ecode_colnames <- paste0("ecode_", 1:4)
+
+#create ecode columns
+df[ , ecode_colnames] <- NA
+
+# for each row extract the first 4 ecodes and add them to the e-code columns
+# icd10 e-codes do not start with E.
+
+# get a list of all ecodes (includes icd10 code if requested)
+ecode_regex <- paste0("^", etab$dx, collapse = "|")
+
+profvis::profvis({
+df[ , ecode_colnames] <- t(apply(df, 1, function(row){
+      # remove decimal
+      row <- sub("\\.", "", row)
+      # get all e codes using pattern matching
+      row_ecodes <- stringr::str_extract(as.character(unlist(row)), ecode_regex)
+      # row_ecodes <- grep(ecode_regex, as.character(unlist(row)), value = T) #1370
+      # remove na values
+      row_ecodes <- na.omit(row_ecodes)
+      # save first 4 Ecodes
+      row_ecodes[1:4]
+}))
+
+})
+grep(ecode_regex, sub("\\.", "", as.character(df[6,-1])), value = T)
+
+grep(ecode_regex, c("X600", "X53", "hi","Y82"), value = T)
+
+as.matrix(df[,-1]) %>%
+      t() %>%
+      .[,]
+      stringr::str_extract( ecode_regex)
+{.}
+
+