Skip to content

Commit

Permalink
custom gff: skip protein_coding transcripts with no exons -e102 (#870)
Browse files Browse the repository at this point in the history
* skip protein_coding transcripts with no exons

* add test

* simplify test
  • Loading branch information
ima23 authored Nov 10, 2020
1 parent d2749f7 commit ef2b586
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 0 deletions.
6 changes: 6 additions & 0 deletions modules/Bio/EnsEMBL/VEP/AnnotationSource/File/BaseGXF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,12 @@ sub _create_transcript {
}
}

# check for exons for protein_coding biotype
if ($biotype eq 'protein_coding' && scalar @exons == 0){
$self->warning_msg("WARNING: No exons found for protein_coding transcript $id");
return;
}

# sort exons
if($tr_record->{strand} > 0) {
@exons = sort {$a->{start} <=> $b->{start}} @exons;
Expand Down
53 changes: 53 additions & 0 deletions t/AnnotationSource_File_GFF.t
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,59 @@ SKIP: {
is(scalar (grep {defined($_)} map {$as->lazy_load_transcript($_)} @{$as->_create_transcripts($records)}), 3, 'overlapping exons skips transcript');
ok($tmp =~ /Failed to add exon to transcript/, 'overlapping exons warning message');

# missing exons for protein_coding transcript
my %feature_record = (
'_children' => [
{ '_parent_id' => [ 'parent_gene_id.1' ],
'attributes' => { 'Parent' => 'parent_gene_id.1' },
'chr' => '21',
'end' => 36705932,
'phase' => 0,
'source' => 'test',
'start' => 36705821,
'strand' => '-1',
'type' => 'CDS',
},
{ '_parent_id' => [ 'parent_gene_id.1' ],
'attributes' => { 'Parent' => 'parent_gene_id.1'},
'chr' => '21',
'end' => 36705714,
'phase' => 2,
'source' => 'test',
'start' => 36705179,
'strand' => '-1',
'type' => 'CDS',
} ],
'_gene_record' => {
'_parent_id' => [],
'attributes' => {
'ID' => 'parent_gene_id',
},
'chr' => '21',
'end' => 36705932,
'phase' => undef,
'source' => 'test',
'start' => 36705179,
'strand' => '-1',
'type' => 'gene',
},
'_id' => 'parent_gene_id.1',
'_parent_id' => [ 'parent_gene_id' ],
'attributes' => {
'ID' => 'parent_gene_id.1',
'Parent' => 'parent_gene_id',
},
'chr' => '21',
'end' => 36705932,
'phase' => undef,
'source' => 'test',
'start' => 36705179,
'strand' => '-1',
'type' => 'mRNA',
);
my $trans = $as->lazy_load_transcript(\%feature_record, $feature_record{_gene_record});
ok($tmp =~ /No exons found for protein_coding transcript/, 'no exons warning message');

# restore STDERR
open(STDERR, ">&SAVE") or die "Can't restore STDERR\n";

Expand Down

0 comments on commit ef2b586

Please sign in to comment.