genotyping.Rmd

---
title: "WM_selection"
author: "Nieto_Lopez"
date: "8/29/2018"
output:
  pdf_document: default
  html_document: default
---
#SELECTION OF WHITE MOLD ISOLATES (_Sclerotinia sclerotiorum_) FOR FUNGICIDE _IN VITRO_ TEST BY DISCRIMINATORY DOSES  


Before produce Knit run line 263 twice  
##Background  
There are by very huge amount of isolates in which sampling is needed in order to know if fungicide sensitivity has decreased through the space and time.  

##Objective
 1. Select hierarchieally isolates by space
 2. From objective 1, make a Venn diagram as a filterring those already used andselect hierarchieally isolates by time 
 
```{r setup, include=FALSE}
#Image: ![](Documents/to/white_mold.png)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(knitr)
library(maps)
library(mapdata)
knitr::opts_chunk$set(echo = TRUE)
```
###Reading data  
```{r}
WM <- read.csv("data/WM-2015-2016-2017_fixingproductnames.csv", stringsAsFactors = TRUE)# Making the columns as factors
  WM$Year <- as.factor(WM$Year)# Changing the level of year to factor
  WM$Collection_ID <- as.numeric(WM$Collection_ID)# The goal is to change  the level of Collection to numeric, but for some reason, it should be by step by step. Changing the level of year to numeric in first approach 
  WM$Collection_ID <- as.character(WM$Collection_ID)#Changing the level of year to character as second approach
  levels(WM$Origin )[levels(WM$Origin ) == "trial"] <- "Fungicide field trials"#Changing the name to be more specific
  levels(WM$Origin )[levels(WM$Origin ) == "Survey"] <- "Farmer fields"#Changing the name to be more specific
  levels(WM$DNA.Extraction )[levels(WM$DNA.Extraction ) == ""] <- "No DNA extraction"#Changing the name to be more specific
  levels(WM$DNA.Extraction )[levels(WM$DNA.Extraction ) == "No"] <- "No DNA extraction"#Changing the name to be more specific
  levels(WM$DNA.Extraction )[levels(WM$DNA.Extraction ) == "Yes"] <- "Yes DNA extraction"#Changing the names to be more specific
usa <- map_data("usa")# USA map 
states <- map_data("state")# USA map by states
counties <- map_data("county")# USA map by counties
bra <- map_data("worldHires", "Brazil")
mexico <- map_data("worldHires", "Mexico")

mid_west_county <-
  subset(
    counties,
    region == "nebraska" |
      region == "wisconsin" | region == "michigan" | region == "iowa"
  )

```

```{r}
tested <-# included Mexican isolates
  c(
    "1",
    "118",
    "123",
    "12B",
    "129",
    "20",
    "21",
    "449",
    "461",
    "467",
    "475",
    "558",
    "564",
    "568",
    "581",
    "645",
    "800",
    "667",
    "74SS1",
    "8",
    "87",
    "318",
    "413",
    "419",
    "62-02",
    "62-03",
    "62-04",
    "78-01",
    "78-02",
    "78-05",
    "H-01",
    "H-03",
    "H-04",
    "I-20",
    "S-01",
    "W212",
    "1025",
    "1026",
    "1027",
    "1029",
    "1032",
    "1033",
    "1870",#mexican isolates
    "1872",#mexican isolates
    "1884",#mexican isolates
    "1885",#mexican isolates
    "54C",
    "65B",
    "51C",
    "71B",
    "64D",
    "53B",
    "60A",
    "698",
    "699",
    "710",
    "711",
    "724",
    "725",
    "731",
    "732",
    "738",
    "739",
    "746",
    "751",
    "755",
    "756",
    "757",
    "764",
    "765",
    "771",
    "772",
    "786",
    "787",
    "811",
    "812",
    "813",
    "814",
    "817",
    "818",
    "851",
    "852",
    "853",
    "855",
    "858",
    "859",
    "860",
    "861",
    "862",
    "867",
    "870",
    "871",
    "877",
    "878",
    "884",
    "885",
    "891",
    "892",
    "896",
    "897",
    "901",
    "902",
    "905",
    "906",
    "908",
    "909",
    "911",
    "912",
    "914",
    "274",
    "307",
    "504",
    "505",
    "2384",
    "2385",
    "2386",
    "2388",
    "2407",
    "2408",
    "2383",
    "1058",
    "1081",
    "1087",
    "1109",
    "1127",
    "1128",
    "1134",
    "1135",
    "1139",
    "1175",
    "1026",
    "1027",
    "1029",
    "1328",
    "1329",
    "1330",
    "1331",
    "1332",
    "1340",
    "1345",
    "1365",
    "1366",
    "1392",
    "1327",
    "1502",
    "1582",
    "1620",
    "1622",
    "1541",
    "1671",
    "1672",
    "1691",
    "1692",
    "1712",
    "1713",
    "1721",
    "1722",
    "1731",
    "1732",
    "1791",
    "1",
    "118",
    "123",
    "20",
    "74SS1",
    "8",
    "419",
    "78-02",
    "H-01",
    "H-03"
  )#Fungicide pre-tested isolates from USA midwestand including baselines  
mexican_isolates <- c("248","1857":"1940" )#All isolates from Mexico

```

###Data wrangling
```{r}
  wm1 <-
  WM %>% select(
    Collection_ID,
    State,
    Field,
    Year,
    Origin,
    long,
    lat,
    hyphal_tip..ht.,
    DNA.Extraction,
    County,
    Serial_agar_dilution_for_creating_the_model_..N.20.,
    Form.ID
  )# Selecting 15 the most important columns so far

wm1_ <-  wm1 %>%
  group_by(Field) %>% 
   mutate(Fungicide_status = ifelse(Collection_ID  %in% tested, "tested", "no_tested")) %>%  # Creating a new column based wheather or not funcides pretested
  filter(!Collection_ID %in% mexican_isolates) %>%#Taking out mexican isolates
  mutate (num_isolates_per_field = ifelse(n() > 2, "2>isolates/Field", "<3isolates/Field")) %>%#Creating a new column if the number of isolates/field is major than 2
  ungroup()

wm1_$num_isolates_per_field <- as.factor(wm1_$num_isolates_per_field)#Converting as factor the levels of the new columns

```
###Cleaning and making some arrangemments from the inventory so far, run twice
```{r, warning= FALSE}
wm1_ <- wm1_[-970, ]# Taking out this column that is in blank
wm1_$long <- as.numeric(as.character(wm1_$long))# Transforming long variables to numeric
wm1_$long <- (wm1_$long) * -1 # Transforming the long variable to the Hemispher West side

```
###Subsetting some data
```{r}
# Creating the base map for subsequent plots
mid_west_base <-
  ggplot(data = mid_west_county,
         mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "grey")
# Creating the Master Dataframe about selection
selection <-
  wm1_ %>% select(# selecting the most important 15 columns plus the new one created
    Collection_ID,
    State,
    County,
    Field,
    Year,
    Origin,
    long,
    lat,
    hyphal_tip..ht.,
    DNA.Extraction,
    Serial_agar_dilution_for_creating_the_model_..N.20.,
    Form.ID,
    Fungicide_status,
    num_isolates_per_field
  ) %>% 
  arrange(factor (State, levels = c("NE", "IA", "WI", "MI"))) %>%#Arrange in this order from left to right in the US map  
  group_by(State, County, Field) %>%# grouping in order to select those with >=3 isolates/field/County/State 
  mutate(num_ISO_per_field = n()) %>%# Creating the column to count them 
  ungroup %>%
  group_by(State, County) %>%# grouping in order to select those   fields(>=3 isolates)/County/State 
  mutate(num_field_County = length(unique(factor(Field)))) %>%# Creating the column to count them 
  select(# Ordering the collumns to easy visulalization
    Collection_ID,
    State,
    County,
    Field,
    num_ISO_per_field,
    num_field_County,
    Year,
    Origin,
    long,
    lat,
    hyphal_tip..ht.,
    DNA.Extraction,
    Serial_agar_dilution_for_creating_the_model_..N.20.,
    Form.ID,
    Fungicide_status,
    num_isolates_per_field
  )
#Subdata for randomly selection of those 3 isolates/field/county/state 
selection1 <- selection %>%
  ungroup() %>%
  group_by(State, County, Field) %>%#grouping in order to select 3 isolates/field/County/State 
  filter(num_ISO_per_field > 10) %>% 
         #DNA.Extraction == "No DNA extraction") #%>%# Filtering those fields with more than 10 isolates/ field . Here the original idea was to filter by 3 >= isolates but  it does not not work with the next pipe available from tidyverse 
  sample_n(3, replace = F) #Randomly sampled 3 isolates from the 10 available from each group 

# Subdata another one to work with those groups with less than 10 isolates and more or equal to 3 isolates. The way to do it was to rest to the ones done 
selection2 <- selection %>%# using the master dataframe
  ungroup() %>%
  group_by(State, County, Field) %>%#grouping in order to select 3 isolates/field/County/State 
  filter(!num_ISO_per_field %in% selection1$num_ISO_per_field) %>%
 # filter(DNA.Extraction == "No DNA extraction") %>%#filterring those already done 
  ungroup()

```
###Finding randomly either by function or by hand writing  3 samples for each (by sample function) field for those groupings with less than 10 isolates/field. In those cases of error it is due there is less than 3 isolates per field but it still being okay 

```{r}
fields1 <- selection2 %>%
  filter(State == "NE", County == "Valley", Field == "va")
a <- sample(fields1$Collection_ID, size = 3, replace = FALSE)# sampling 3 for each group

fields2 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "ant")
b <- sample(fields2$Collection_ID, size = 3, replace = FALSE)

fields3 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "21")
c <- sample(fields3$Collection_ID, size = 3, replace = FALSE)

fields4 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "22")
d <- sample(fields4$Collection_ID, size = 3, replace = FALSE)

fields5 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "23")
e <- sample(fields5$Collection_ID, size = 3, replace = FALSE)

fields6 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "24")
f <- sample(fields6$Collection_ID, size = 3, replace = FALSE)

fields7 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "25")
g <- sample(fields7$Collection_ID, size = 3, replace = FALSE)

fields8 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "69")
h <- sample(fields8$Collection_ID, size = 3, replace = FALSE)

fields9 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "75")
i <- sample(fields9$Collection_ID, size = 3, replace = FALSE)

fields10 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "102")
#j <- sample(fields10$Collection_ID, size = 3, replace = FALSE)# less than 3 isolates per field

fields11 <- selection2 %>%
  filter(State == "NE", County == "Antelope", Field == "103")
#k <- sample(fields11$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields12 <- selection2 %>%
  filter(State == "NE", County == "Cedar", Field == "6")
l <- sample(fields12$Collection_ID, size = 3, replace = FALSE)

fields13 <- selection2 %>%
  filter(State == "NE", County == "Cedar", Field == "67")
m <- sample(fields13$Collection_ID, size = 3, replace = FALSE)

fields14 <- selection2 %>%
  filter(State == "NE", County == "Cedar", Field == "78")
n <- sample(fields14$Collection_ID, size = 3, replace = FALSE)

fields15 <- selection2 %>%
  filter(State == "NE", County == "Cedar", Field == "98")
o <- sample(fields15$Collection_ID, size = 3, replace = FALSE)

fields16 <- selection2 %>%
  filter(State == "NE", County == "Cedar", Field == "100")
p <- sample(fields16$Collection_ID, size = 3, replace = FALSE)

fields17 <- selection2 %>%
  filter(State == "NE", County == "Cedar", Field == "101")
q <- sample(fields17$Collection_ID, size = 3, replace = FALSE)

fields18 <- selection2 %>%
  filter(State == "NE", County == "Greeley", Field == "8")
r <- sample(fields18$Collection_ID, size = 3, replace = FALSE)

fields19 <- selection2 %>%
  filter(State == "NE", County == "Greeley", Field == "9")
s <- sample(fields19$Collection_ID, size = 3, replace = FALSE)

fields20 <- selection2 %>%
  filter(State == "NE", County == "Greeley", Field == "68")
#t <- sample(fields20$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields21 <- selection2 %>%
  filter(State == "NE", County == "Madison", Field == "10")
u <- sample(fields21$Collection_ID, size = 3, replace = FALSE)

fields22 <- selection2 %>%
  filter(State == "NE", County == "Madison", Field == "11")
v <- sample(fields22$Collection_ID, size = 3, replace = FALSE)

fields23 <- selection2 %>%
  filter(State == "NE", County == "Boone", Field == "18")
w <- sample(fields23$Collection_ID, size = 3, replace = FALSE)

fields24 <- selection2 %>%
  filter(State == "NE", County == "Boone", Field == "19")
x <- sample(fields24$Collection_ID, size = 3, replace = FALSE)

fields25 <- selection2 %>%
  filter(State == "NE", County == "Boone", Field == "20")
y <- sample(fields25$Collection_ID, size = 3, replace = FALSE)

fields26 <- selection2 %>%
  filter(State == "NE", County == "Boone", Field == "45")
z <- sample(fields26$Collection_ID, size = 3, replace = FALSE)

fields27 <- selection2 %>%
  filter(State == "NE", County == "Boone", Field == "46")
aa <- sample(fields27$Collection_ID, size = 3, replace = FALSE)

fields28 <- selection2 %>%
  filter(State == "NE", County == "Boone", Field == "48")
bb <- sample(fields28$Collection_ID, size = 3, replace = FALSE)

fields29 <- selection2 %>%
  filter(State == "NE", County == "Dodge", Field == "34")
cc <- sample(fields29$Collection_ID, size = 3, replace = FALSE)

fields30 <- selection2 %>%
  filter(State == "NE", County == "Dodge", Field == "35")
dd <- sample(fields30$Collection_ID, size = 3, replace = FALSE)

fields31 <- selection2 %>%
  filter(State == "NE", County == "Holt", Field == "47")
ee <- sample(fields31$Collection_ID, size = 3, replace = FALSE)

fields32 <- selection2 %>%
  filter(State == "NE", County == "Sherman", Field == "60")
ff <- sample(fields32$Collection_ID, size = 3, replace = FALSE)

fields33 <- selection2 %>%
  filter(State == "NE", County == "Kearney", Field == "62")
#gg <- sample(fields33$Collection_ID, size = 3, replace = FALSE)#less than 3 isolates per field

fields34 <- selection2 %>%
  filter(State == "NE", County == "Kearney", Field == "104")
#hh <- sample(fields34$Collection_ID, size = 3, replace = FALSE)# less than 3 isolates per field

fields35 <- selection2 %>%
  filter(State == "NE", County == "Kearney", Field == "10")
ii <- sample(fields35$Collection_ID, size = 3, replace = FALSE)

fields36 <- selection2 %>%
  filter(State == "NE", County == "Cuming", Field == "66")
#jj <- sample(fields36$Collection_ID, size = 3, replace = FALSE)# less than 3 isolates per field

fields37 <- selection2 %>%
  filter(State == "NE", County == "Pierce", Field == "97")
kk <-  sample(fields37$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields38 <- selection2 %>%
  filter(State == "NE", County == "Knox", Field == "99")
ll <- sample(fields38$Collection_ID, size = 3, replace = FALSE)

fields39 <- selection2 %>%
  filter(State == "IA", County == "Howard", Field == "howard")
# mm <- sample(fields39$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields40 <- selection2 %>%
  filter(State == "IA", County == "Chickasaw", Field == "chi")
# nn <- sample(fields40$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields41 <- selection2 %>%
  filter(State == "IA", County == "Floyd", Field == "floyd")
#oo <- sample(fields41$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields42 <- selection2 %>%
  filter(State == "IA", County == "Story", Field == "sto")
pp <- sample(fields42$Collection_ID, size = 3, replace = FALSE)

fields43 <- selection2 %>%
  filter(State == "IA", County == "Shelby", Field == "sh")
qq <- sample(fields43$Collection_ID, size = 3, replace = FALSE)

fields44 <- selection2 %>%
  filter(State == "IA", County == "Benton", Field == "be")
rr <- sample(fields44$Collection_ID, size = 3, replace = FALSE)

fields45 <- selection2 %>%
  filter(State == "IA", County == "Tama", Field == "ta")
ss <- sample(fields45$Collection_ID, size = 3, replace = FALSE)

fields46 <- selection2 %>%
  filter(State == "WI", County == "Sauk City", Field == "sa")
tt <- sample(fields46$Collection_ID, size = 3, replace = FALSE)

fields47 <- selection2 %>%
  filter(State == "WI", County == "Cuba City", Field == "cu")
uu <- sample(fields47$Collection_ID, size = 3, replace = FALSE)

fields48 <- selection2 %>%
  filter(State == "WI", County == "Waushara", Field == "wau")
vv <- sample(fields48$Collection_ID, size = 3, replace = FALSE)

fields49 <- selection2 %>%
  filter(State == "WI", County == "Grant", Field == "la")
ww <- sample(fields49$Collection_ID, size = 3, replace = FALSE)

fields50 <- selection2 %>%
  filter(State == "WI", County == "Iowa", Field == "co")
# xx <- sample(fields50$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields51 <- selection2 %>%
  filter(State == "WI", County == "Rock", Field == "cern")
# yy <- sample(fields51$Collection_ID, size = 3, replace = FALSE) #less than 3 isolates per field

fields52 <- selection2 %>%
  filter(State == "WI", County == "Rock", Field == "cer")
zz <- sample(fields52$Collection_ID, size = 3, replace = FALSE)

fields53 <- selection2 %>%
  filter(State == "MI", County == "Allegan", Field == "Al")
aaa <- sample(fields53$Collection_ID, size = 3, replace = FALSE)

```
###Once all filters, add a new column called "selected" to the master mix dataframe 
```{r}
selection <- selection %>%
  mutate(
    Selected = case_when(
      Collection_ID %in% selection1$Collection_ID ~ "selected",# filtering and naming "selected"" in case of in
      Collection_ID %in% a ~ "selected",
      Collection_ID %in% b ~ "selected",
      Collection_ID %in% c ~ "selected",
      Collection_ID %in% d ~ "selected",
      Collection_ID %in% e ~ "selected",
      Collection_ID %in% f ~ "selected",
      Collection_ID %in% g ~ "selected",
      Collection_ID %in% h ~ "selected",
      Collection_ID %in% i ~ "selected",
      Collection_ID %in% l ~ "selected",
      Collection_ID %in% m ~ "selected",
      Collection_ID %in% n ~ "selected",
      Collection_ID %in% o ~ "selected",
      Collection_ID %in% p ~ "selected",
      Collection_ID %in% q ~ "selected",
      Collection_ID %in% r ~ "selected",
      Collection_ID %in% s ~ "selected",
      Collection_ID %in% u ~ "selected",
      Collection_ID %in% v ~ "selected",
      Collection_ID %in% w ~ "selected",
      Collection_ID %in% x ~ "selected",
      Collection_ID %in% y ~ "selected",
      Collection_ID %in% z ~ "selected",
      Collection_ID %in% aa ~ "selected",
      Collection_ID %in% bb ~ "selected",
      Collection_ID %in% cc ~ "selected",
      Collection_ID %in% dd ~ "selected",
      Collection_ID %in% ee ~ "selected",
      Collection_ID %in% ff ~ "selected",
      #Collection_ID %in% gg ~ "selected",
      Collection_ID %in% ii ~ "selected",
      Collection_ID %in% kk ~ "selected",
      Collection_ID %in% ll ~ "selected",
      Collection_ID %in% pp ~ "selected",
      Collection_ID %in% qq ~ "selected",
      Collection_ID %in% rr ~ "selected",
      Collection_ID %in% ss ~ "selected",
      Collection_ID %in% tt ~ "selected",
      Collection_ID %in% uu ~ "selected",
      Collection_ID %in% vv ~ "selected",
      Collection_ID %in% ww ~ "selected",
      Collection_ID %in% zz ~ "selected",
      Collection_ID %in% aaa ~ "selected",
      TRUE ~ "not_selected"# in case of not, naming "not_selected""
    )
  )  

```
#Selection of the country of Mexico  
```{r}
# Creating the base map for subsequent plots
mexico_base <-
  ggplot(data = mexico,
         mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "grey")

wm3_ <-  wm1 %>%
  mutate(Fungicide_status = ifelse(Collection_ID  %in% tested,# Creating a new column based wheater or not fungicides pretested
                                   "tested", "no_tested")) %>%# Creating a new column based wheater or not fungicides pretested
  group_by(Field) %>% mutate (num_isolates_per_field = ifelse(n() > 2, "2>isolates/Field", "<2isolates/Field")) %>%#Creating a new column if the number of isolates/field is major than 2 "<2isolates/Field"
  ungroup() %>% 
  filter(Collection_ID  %in% mexican_isolates) %>%  #Filtering mexican isolates
ungroup()


wm3_$num_isolates_per_field <-#Converting  as factor the levels of the new columns
  as.factor(wm3_$num_isolates_per_field)


```
###Cleaning and making some arrangemments from the inventory so far, run twice
```{r}
wm3_$long <- as.numeric(as.character(wm3_$long))# Transforming long variables to numeric
wm3_$long <- (wm3_$long) * -1 # Transforming the long variable to the Hemispher West side


```
###Data wrangling
```{r, echo=FALSE}
# creating a subdata of selection_Mexico
selection_Mexico <-
  wm3_ %>% select(
    Collection_ID,
    State,
    County,
    Field,
    Year,
    Origin,
    long,
    lat,
    hyphal_tip..ht.,
    DNA.Extraction,
    Serial_agar_dilution_for_creating_the_model_..N.20.,
    Form.ID,
    Fungicide_status,
    num_isolates_per_field
  ) %>%# Selecting 15 the most important columns so far
  group_by(State, County, Field) %>%
  mutate(num_ISO_per_field = n()) %>%
  ungroup %>%
  group_by(County) %>%
  mutate(num_field_County = length(unique(factor(Field)))) %>%
  select(
    Collection_ID,
    State,
    County,
    Field,
    num_ISO_per_field,
    num_field_County,
    Year,
    Origin,
    long,
    lat,
    hyphal_tip..ht.,
    DNA.Extraction,
    Serial_agar_dilution_for_creating_the_model_..N.20.,
    Form.ID,
    Fungicide_status,
    num_isolates_per_field
  )
#Subdata for randomly selection of those 3 isolates/field/county/state
selection_Mexico1 <- selection_Mexico %>%
  ungroup() %>%
  group_by(State, County, Field) %>%#grouping in order to select 3 isolates/field/County/State
  filter(num_ISO_per_field > 10) %>% 
        # DNA.Extraction == "No DNA extraction") %>%# Filtering those fields with more than 10 isolates/ field and also by no DNA extraction. Here the original idea was to filter by 3 >= isolates but  it does not not work with the next pipe available from tidyverse
  sample_n(3, replace = F)#Randomly sampled 3 isolates from the 10 available from each group

# Subdata another one to work with those groups with less than 10 isolates and more or equal to 3 isolates. The way to do it was to rest to the ones done
selection_Mexico2 <- selection_Mexico %>% # using the dataframe Selection_Mexico 
  ungroup() %>%
  group_by(State, County, Field) %>%#grouping in order to select 3 isolates/field/County/State
  filter(!num_ISO_per_field %in% selection_Mexico1$num_ISO_per_field) %>%
  #filter(DNA.Extraction == "No DNA extraction") %>% #filtering those already done and also by no DNA extraction
  ungroup()
```
###Finding randomly either by function or by hand writing  3 samples for each (by sample function) field for those groupings with less than 10 isolates/field. In those cases of error it is due there is less than 3 isolates per field but it still being okay
```{r, echo=FALSE}
ccc <-
  sample(selection_Mexico2$Collection_ID,# sampling 3 for each group
         size = 3,
         replace = FALSE)


```
###Once all filters, add a new column called "selected" to the master mix dataframe     

#Selection of the country of Mexico  

```{r, echo=FALSE}
selection_Mexico <- selection_Mexico %>%
  mutate(
    Selected = case_when(
      Collection_ID %in% selection_Mexico1$Collection_ID ~ "selected",# filtering and naming "selected"" in case of in
      Collection_ID %in% ccc ~ "selected",
      TRUE ~ "not_selected"# in case of not, naming "not_selected""
    )
  )
```
##Dataframe of selection of objective 1
```{r}
##Summarizing selected by Widwest
selection3 <-
  selection %>% filter(Selected == "selected") %>% ungroup()
##Summarizing selected by Mexico
selection_Mexico3 <-
  selection_Mexico %>% filter(Selected == "selected") %>% ungroup()
# Combining both
selection_FINAL_objective1_genotyping <-
  selection3 %>% bind_rows(selection_Mexico3)%>% 
   filter(DNA.Extraction == "No DNA extraction") #by no DNA extraction
```
#selection for genotyping


#OBJECTIVE 2