Bio::EnsEMBL::Analysis::Tools::Filter
BlastMiniGenewise
Toolbar
Summary
Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $obj =
Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise->
new(
-max_exon_length => '20000',
-multi_exon_min_coverage => '25',
-single_exon_min_coverage => '80',
-max_intron_length => '200000',
-min_split_coverage => 95,
-max_low_complexity => 101,
);
my ($accepted, $rejected) = $obj->filter_genes($genes);
Description
This module is designed to assess gene structures on several factors including
coverage of evidence, size of introns and presence of complete ORF
The limits of these variables are defined in the constructure
The filter method then return 2 sets of genes, the first is the accepted set
the second is the rejected set
Methods
accepted_genes | No description | Code |
create_Attribute | No description | Code |
filter_genes | Description | Code |
get_Attribute | No description | Code |
get_Transcript_supporting_evidence | No description | Code |
max_exon_length | No description | Code |
max_intron_length | No description | Code |
max_low_complexity | No description | Code |
min_coverage | No description | Code |
min_split_coverage | No description | Code |
new | Description | Code |
rejected_genes | No description | Code |
seqfetcher | No description | Code |
single_exon_min_coverage | No description | Code |
slice | No description | Code |
split_genes | No description | Code |
split_multi_transcript_genes | No description | Code |
unwanted_evidence | No description | Code |
validate_Transcript | Description | Code |
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise Arg [2] : Arrayref of Bio::EnsEMBL::Gene objects Function : filter genes on defined characteristics Returntype: 2 arrayrefs, first accepted genes, then rejected Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise Function : to create a BlastMiniGenewise filter object Returntype: Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise Exceptions: there are several args which musst be defined otherwise it will throw Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise Arg [2] : Bio::EnsEMBL::Transcript Function : validate the transcript on the basis of several factors Returntype: boolean, 1 if valid 0 if not Exceptions: Example : |
Methods code
accepted_genes | description | prev | next | Top |
sub accepted_genes
{ my ($self, $arg) = @_;
$self->{accepted_genes} = [] if(!$self->{accepted_genes});
if($arg){
my $temp;
if(ref($arg) ne "ARRAY"){
throw($arg." must be a Bio::EnsEMBL::Gene object")
unless($arg->isa("Bio::EnsEMBL::Gene"));
$temp = [$arg];
}else{
$temp = $arg;
}
push(@{$self->{accepted_genes}}, @$temp);
}
return $self->{accepted_genes}; } |
sub create_Attribute
{ my ($self, $reason, $description) = @_;
$description = $reason if(!$description);
my $attrib = Bio::EnsEMBL::Attribute->new(
-code => $reason,
-name => $reason,
-description => $description,
-value => $description,
);
return $attrib; } |
sub filter_genes
{ my ($self, $genes) = @_;
my @accepted;
GENE:foreach my $gene(@$genes, @{$self->split_genes}){
my @transcripts = @{$gene->get_all_Transcripts};
if(@transcripts > 1){
my $cloned_gene = clone_gene($gene);
warning("Filter::BlastMiniGenewise only works on single transcript ".
"structures. Will convert ".$gene." into to one gene one ".
"transcript structures");
my $split_genes = convert_to_genes($cloned_gene->get_all_Transcripts);
push(@$genes, @$split_genes) if($self->split_multi_transcript_genes);
$self->split_genes($gene);
next GENE;
}
my $is_valid = $self->validate_Transcript($transcripts[0]);
if($is_valid){
logger_info("Have accepted ".id($gene)." ".coord_string($gene));
push(@accepted, $gene);
}else{
logger_info("Have rejected ".id($gene)." ".coord_string($gene));
$self->rejected_genes($gene);
}
}
ACCEPTED:foreach my $accepted(@accepted){
my @transcripts = @{$accepted->get_all_Transcripts};
my $evidence = $self->get_Transcript_supporting_evidence($transcripts[0]);
if(evidence_coverage_greater_than_minimum($transcripts[0], $evidence, ($self->min_split_coverage -1))){
$self->accepted_genes($accepted);
next ACCEPTED;
}
if(@{$transcripts[0]->get_all_Exons} == 1){
$self->accepted_genes($accepted);
next ACCEPTED;
}
my $split_transcripts = split_Transcript($transcripts[0], $self->max_intron_length);
my $acceptable_transcripts = tidy_split_transcripts($transcripts[0], $split_transcripts);
my $genes = convert_to_genes($acceptable_transcripts, $accepted->analysis, $accepted->biotype);
foreach my $gene(@$genes){
my $attrib = $self->get_Attribute("split_tscript");
foreach my $transcript(@{$gene->get_all_Transcripts}){
$transcript->add_Attributes($attrib);
}
$self->accepted_genes($gene);
}
}
return $self->accepted_genes, $self->rejected_genes; } |
sub get_Attribute
{ my ($self, $code, $description) = @_;
if(!$self->{'attribute_hash'}){
$self->{'attribute_hash'} = {};
}
my $attrib = $self->{'attribute_hash'}->{$code};
if(!$attrib){
$attrib = $self->create_Attribute($code, $description);
$self->{'attribute_hash'}->{$code} = $attrib;
}
return $attrib;
}
1; } |
get_Transcript_supporting_evidence | description | prev | next | Top |
sub get_Transcript_supporting_evidence
{ my ($self, $transcript, $seqfetcher) = @_;
$seqfetcher = $self->seqfetcher if(!$seqfetcher);
throw("Can't get ".$transcript." transcrpts supporting features without ".
"a seqfetcher object") if(!$seqfetcher);
my $ids = list_evidence($transcript);
my $sequence;
foreach my $id(@$ids){
$sequence = $seqfetcher->get_Seq_by_acc($id);
last if($sequence);
}
return $sequence; } |
sub max_exon_length
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{max_exon_length} = $arg;
}
return $self->{max_exon_length}; } |
sub max_intron_length
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{max_intron_length} = $arg;
}
return $self->{max_intron_length}; } |
sub max_low_complexity
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{max_low_complexity} = $arg;
}
return $self->{max_low_complexity}; } |
sub min_coverage
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{min_coverage} = $arg;
}
return $self->{min_coverage}; } |
sub min_split_coverage
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{min_split_coverage} = $arg;
}
return $self->{min_split_coverage}; } |
sub new
{ my ($class,@args) = @_;
my $self = bless {},$class;
my ($max_exon_length, $split_genes, $slice, $unwanted_evidence,
$max_intron_length, $min_coverage, $max_low_complexity, $seqfetcher,
$single_exon_min_coverage, $min_split_coverage)
= rearrange([qw(MAX_EXON_LENGTH SPLIT_GENES SLICE UNWANTED_EVIDENCE
MAX_INTRON_LENGTH MULTI_EXON_MIN_COVERAGE MAX_LOW_COMPLEXITY
SEQFETCHER SINGLE_EXON_MIN_COVERAGE MIN_SPLIT_COVERAGE)], @args);
$self->max_exon_length($max_exon_length);
$self->split_multi_transcript_genes($split_genes);
$self->slice($slice);
$self->unwanted_evidence($unwanted_evidence);
$self->min_coverage($min_coverage);
$self->single_exon_min_coverage($single_exon_min_coverage);
$self->max_low_complexity($max_low_complexity);
$self->min_split_coverage($min_split_coverage);
$self->seqfetcher($seqfetcher);
$self->max_intron_length($max_intron_length);
throw("Filter::BlastMiniGenewise needs a seqfetcher object to run")
if(!$self->seqfetcher);
throw("Filter::BlastMiniGenewise needs a max exon length ")
if(! defined($self->max_exon_length));
throw("Filter::BlastMiniGenewise needs a max intron length ")
if(! defined($self->max_intron_length));
throw("Filter::BlastMiniGenewise needs a multi exon min coverage")
if(! defined($self->min_coverage));
throw("Filter::BlastMiniGenewise needs a single exon min coverage")
if(!defined($self->single_exon_min_coverage));
throw("Filter::BlastMiniGenewise needs a max low complexity value ")
if(!defined($self->max_low_complexity));
throw("Filter::BlastMiniGenewise needs a min split coverage")
if(! defined($self->min_split_coverage));
return $self;
}
} |
sub rejected_genes
{ my ($self, $arg) = @_;
$self->{rejected_genes} = [] if(!$self->{rejected_genes});
if($arg){
my $temp;
if(ref($arg) ne "ARRAY"){
throw($arg." must be a Bio::EnsEMBL::Gene object")
unless($arg->isa("Bio::EnsEMBL::Gene"));
$temp = [$arg];
}else{
$temp = $arg;
}
push(@{$self->{rejected_genes}}, @$temp);
}
return $self->{rejected_genes}; } |
sub seqfetcher
{ my ($self, $arg) = @_;
if($arg){
throw("Filter::BlastMiniGenewise ".
$arg." must have a method get_Seq_by_acc")
unless($arg->can("get_Seq_by_acc"));
$self->{seqfetcher} = $arg;
}
return $self->{seqfetcher}; } |
sub single_exon_min_coverage
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{single_exon_min_coverage} = $arg;
}
return $self->{single_exon_min_coverage}; } |
sub slice
{ my ($self, $arg) = @_;
if(defined $arg){
throw($arg." should be a Bio::EnsEMBL::Slice")
unless($arg->isa("Bio::EnsEMBL::Slice"));
$self->{slice} = $arg;
}
return $self->{slice}; } |
sub split_genes
{ my ($self, $arg) = @_;
$self->{split_genes} = [] if(!$self->{split_genes});
if($arg){
my $temp;
if(ref($arg) ne "ARRAY"){
throw($arg." must be a Bio::EnsEMBL::Gene object")
unless($arg->isa("Bio::EnsEMBL::Gene"));
$temp = [$arg];
}else{
throw($arg->[0]." must be a Bio::EnsEMBL::Gene object")
unless($arg->[0]->isa("Bio::EnsEMBL::Gene"));
$temp = $arg;
}
push(@{$self->{split_genes}}, @$temp);
}
return $self->{split_genes}; } |
sub split_multi_transcript_genes
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{split_multi_transcript_gene} = $arg;
}
return $self->{split_multi_transcript_gene}; } |
sub unwanted_evidence
{ my ($self, $arg) = @_;
if(defined $arg){
throw($arg." should be a hashref")
unless(ref($arg) eq "HASH");
$self->{unwanted_evidence} = $arg;
}
return $self->{unwanted_evidence}; } |
sub validate_Transcript
{ my ($self, $transcript) = @_;
my $slice = $self->slice;
$slice = $transcript->slice if(!$slice);
my $is_valid = 0;
unless(are_strands_consistent($transcript)){
$is_valid++;
my $attrib = $self->get_Attribute("incons_strands");
$transcript->add_Attributes($attrib);
}
unless(are_phases_consistent($transcript)){
$is_valid++;
my $attrib = $self->get_Attribute("incons_phases");
$transcript->add_Attributes($attrib);
}
unless(is_not_folded($transcript)){
$is_valid++;
my $attrib = $self->get_Attribute("is_folded");
$transcript->add_Attributes($attrib);
}
unless(has_no_unwanted_evidence($transcript,
$self->unwanted_evidence)){
$is_valid++;
my $attrib = $self->get_Attribute("unwanted_evidence");
$transcript->add_Attributes($attrib);
}
EXON:foreach my $exon(@{$transcript->get_all_Exons}){
if(exon_length_less_than_maximum($exon, $self->max_exon_length)){
next EXON;
}else{
$is_valid++;
last EXON;
my $attrib = $self->get_Attribute("exon_too_long");
$transcript->add_Attributes($attrib);
}
}
if(contains_internal_stops($transcript)){
warning(Transcript_info($transcript)." contains internal stop codons");
$is_valid++;
my $attrib = $self->get_Attribute("contains_stops");
$transcript->add_Attributes($attrib);
}
unless(validate_Translation_coords($transcript)){
$is_valid++;
my $attrib = $self->get_Attribute("borked_coords");
$transcript->add_Attributes($attrib);
}
unless(low_complexity_less_than_maximum($transcript,
$self->max_low_complexity)){
$is_valid++;
my $attrib = $self->get_Attribute("low_complex");
$transcript->add_Attributes($attrib)
}
my $evidence = $self->get_Transcript_supporting_evidence($transcript);
if(@{$transcript->get_all_Exons} >= 2){
my $coverage = evidence_coverage($transcript, $evidence);
unless(evidence_coverage_greater_than_minimum($transcript, $evidence,
($self->min_coverage - 1))){
$is_valid++;
my $attrib = $self->get_Attribute("evi_coverage");
$transcript->add_Attributes($attrib);
}
}else{
unless(evidence_coverage_greater_than_minimum($transcript, $evidence,
($self->single_exon_min_coverage
-1))){
$is_valid++;
my $attrib = $self->get_Attribute("evi_coverage");
$transcript->add_Attributes($attrib);
}
}
warning(Transcript_info($transcript)." failed ".$is_valid." tests out of 9 ".
"returning 0") if($is_valid >= 1);
return 0 if($is_valid >= 1);
return 1; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
Arg [1] : Bio::EnsEMBL::Analysis::Tools::Filter::BlastMiniGenewise
Arg [2] : int/hash ref/array ref/object
Function : to store the given value
Returntype: the give value
Exceptions: some with throw if given the wrong thing
Example :