Skip to content

Commit

Permalink
bugfix trinity assembler
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobias Hofmann committed Jul 18, 2018
1 parent 375c9b0 commit 57348ac
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 18 deletions.
9 changes: 4 additions & 5 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ package:
version: {{ version }}

source:
fn: secapr_{{ version }}.tar.gz
url: https://github.com/AntonelliLab/seqcap_processor/archive/v{{ version }}.tar.gz
sha256: a14cea6d58154759da84c9626b17b6010626b907980e9e5bbde5ee9c2be5a3ca

build:
skip: True # [not py27]
skip: True # [not py27]
number: 2
script: python -m pip install --no-deps --ignore-installed .
entry_points:
Expand All @@ -37,9 +36,9 @@ requirements:
- lastz
- mafft >=7.2
- muscle
- trimmomatic
- trimmomatic ==0.33
- abyss
- trinity
- trinity <=2.3.2
- fastqc
- pandas
- numpy
Expand All @@ -57,4 +56,4 @@ about:
home: 'https://github.com/AntonelliLab/seqcap_processor'
license: MIT
license_file: LICENSE
summary: 'Process sequence-capture FASTQ files into alignments for phylogenetic analyses. Integrates allele phasing, producing haplotype alignments.'
summary: 'Process sequence-capture FASTQ files into alignments for phylogenetic analyses. Integrates allele phasing, producing haplotype alignments.'
35 changes: 26 additions & 9 deletions secapr/assemble_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def main(args):

def assembly_trinity(forw,backw,output_folder,id_sample,cores,min_length,max_memory):
print ("De-novo assembly with Trinity of sample %s:" %id_sample)
print(output_folder)
#print(output_folder)
command = [
"Trinity",
"--seqType",
Expand All @@ -204,14 +204,31 @@ def assembly_trinity(forw,backw,output_folder,id_sample,cores,min_length,max_mem
"--output",
output_folder
]
try:
print ("Building contigs........")
with open(os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample), 'w') as log_err_file:
p = subprocess.Popen(command, stdout=log_err_file, stderr=log_err_file)
p.communicate()
print ("%s assembled. Trinity-stats are printed into %s" %(id_sample, os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample)))
except:
print ("Trinity failed, maybe due to limited stack-size. Try increase stacksize with command 'zsh | ulimit -s unlimited | sh' and run again.")
print ("Building contigs........")
with open(os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample), 'w') as log_err_file:
p = subprocess.Popen(command, stdout=log_err_file, stderr=log_err_file)
p.communicate()
filename = os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample)
file_object = open(filename, 'r')
for line in file_object:
if line.startswith('Error'):
print(line)
print ('SECAPR NOTE:\nTrinity is currently only functional in the Linux distribution of SECAPR due to Java incompatibilities.\n')
#'However, the environment on MacOS machines can be easily altered by hand in order to properly run Trinity.\n',
#'This might however compromise the functionality of other parts of the SECAPR pipeline, therefore we recommend to undo the changes made in the envrionment after using Trinity by following the instructions below.\n\n',
#'In order to run the Trinity assembly on MacOS do the following:\n',
#'1. within the SECAPR conda envrionment type: "conda install openjdk=7"\n',
#'2. run the secapr assemble_reads function with Trinity (using the "--assembler trinity" flag)\n',
#'3. after assembly rebuild the SECAPR default environment by typing "conda install trimmomatic=0.33"\n'
sys.exit()
elif line.startswith('Trinity run failed.'):
print (''.join(file(filename)))
print ('SECAPR NOTE:\nTrinity is currently only functional in the Linux distribution of SECAPR.\n')
sys.exit()

print ("%s assembled. Trinity-stats are printed into %s" %(id_sample, os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample)))
#except:
# print ("Trinity failed, maybe due to limited stack-size. Try increase stacksize with command 'zsh | ulimit -s unlimited | sh' and run again.")

def assembly_abyss(forw,backw,singlef,singleb,output_folder,id_sample,kmer,cores,args):
print ("De-novo assembly with abyss of sample %s:" %id_sample)
Expand Down
2 changes: 1 addition & 1 deletion secapr/phase_alleles.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def main(args):
if os.path.isdir(path):
subfolder_path = os.path.join(input_folder,subfolder)
if subfolder_path.endswith('_remapped') or subfolder_path.endswith('_locus_selection'):
sample = subfolder.split('_')[0]
sample = '_'.join(subfolder.split('_')[:-1])
sample_output_folder = os.path.join(out_dir,'%s_phased' %sample)
if not os.path.exists(sample_output_folder):
os.makedirs(sample_output_folder)
Expand Down
11 changes: 8 additions & 3 deletions secapr/reference_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,13 @@ def clean_with_picard(sample_output_folder,sample_id,sorted_bam,log):
"VALIDATION_STRINGENCY=LENIENT"
]
print ("Removing duplicate reads with Picard..........")
with open(os.path.join(log, "picard_screen_out.txt"), 'w') as log_err_file:
pi = subprocess.Popen(run_picard, stderr=log_err_file)
pi.communicate()
try:
with open(os.path.join(log, "picard_screen_out.txt"), 'w') as log_err_file:
pi = subprocess.Popen(run_picard, stderr=log_err_file)
pi.communicate()
except OSError:
print('Not enough reads mapped to reference in order to run Picard. Try using the "--keep_duplicates" flag in order to avoid the use of Picard.')
quit()
print ("Duplicates successfully removed.")
# Cleaning up a bit
has_duplicates = "%s/including_duplicate_reads" %sample_output_folder
Expand Down Expand Up @@ -753,6 +757,7 @@ def main(args):
bam_consensus_with_duplicates = bam_consensus(reference,dupl_bam,dupl_name_stem,dupl_output_folder,min_cov)
join_fastas(out_dir,sample_out_list)
# create file with read-coverage overview
print("#" * 50)
sample_bam_dict, input_type = get_bam_path_dict(out_dir)
# currently only available for unphased data
if input_type == 'unphased':
Expand Down

0 comments on commit 57348ac

Please sign in to comment.