-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path09 control.R
62 lines (51 loc) · 3.18 KB
/
09 control.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# control.R
# first/last update: 25.11.2019/26.11.2019
# reads in separated alignments and aligns them with the mRNAs and the original subject sequences
# first sequence in the alignment is the Malus mRNA, second the first subject sequence
# library(DECIPHER)
filtered_query_sequences <- "Pyrus_hits_loci_filtered_6_loci_with_chro00_02122019_queries_fully_compatible_both_genomes.fasta" # name of file with filtered sequences for probe design; note that the sequence name and the sequence must be tab delimited and named "seq_no" and "sequence"; name without ">"
file_subject_sequences <- "mRNAs subject sequences extracted from Malus and Pyrus.txt"
##########################################################################################################
########################################### read in data #################################################
##########################################################################################################
query_sequences <- read.csv(filtered_query_sequences, sep="\t", stringsAsFactors = F)
subject_sequences <- read.csv(file_subject_sequences, sep="\t", stringsAsFactors = F)
subject_sequences <- subject_sequences[,c("query_ID","sequence")]
queries <- sort(unique(subject_sequences[,"query_ID"]))
FileNames <- choose.files(".fa") # select sequences
len <- nchar(FileNames)
query <- substring(FileNames,(nchar(getwd())+10),(nchar(getwd())+21))
for (i in 1:length(queries)) {
query_name <- queries[i]
pos1 <- rownames(subset(subject_sequences,query_ID==query_name))
pos2 <- which(query==query_name)
if(length(pos2)>0) {
file1 <- as.data.frame(c(as.character(paste(">",query_name,sep="")),as.character(query_sequences[query_sequences$seq_no==query_name,"sequence"])))
colnames(file1) <- "V1"
file2 <- as.data.frame(c(as.character(paste(">",query_name,sep="")),as.character(subject_sequences[pos1[1],"sequence"])))
colnames(file2) <- "V1"
file_aligned <- rbind(file1,file2)
for(j in pos2) {
file_aligned_j <- as.data.frame(read.csv(FileNames[j], sep="\t", stringsAsFactors = F, header=F))
file_aligned <- rbind(file_aligned,file_aligned_j)
}
FileName <- paste("control/aligned/",query_name,sep="")
FileName <- paste(FileName,".fa",sep="")
write.table(file_aligned,FileName,row.names=F,col.names=FALSE,quote=F)
dna <- readDNAStringSet(FileName)
aligned_dna <- AlignSeqs(dna)
aligned_adjusted_dna <- AdjustAlignment(aligned_dna)
write.table(aligned_adjusted_dna,FileName,col.names=FALSE,quote=F)
file <- read.csv(FileName, sep=" ", header = F, stringsAsFactors = F)
file[,1] <- paste(">",file[,1],sep="")
file <- c(as.character(file[,1]),as.character(file[,2]))
m <- (length(file)/2)
sort_key <- NULL
for(j in 1:m) {
sort_key_j <- c(j,j+m)
sort_key <- c(sort_key,sort_key_j)
}
file <- file[sort_key]
write.table(file,FileName,row.names=FALSE,col.names=FALSE,quote=F)
}
}