Bio::EnsEMBL::Analysis::Runnable BlastMiniGenewise
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::Runnable::BlastMiniGenewise
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::Analysis::Runnable::Blast
Bio::EnsEMBL::Analysis::Runnable::BlastMiniBuilder
Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
Bio::EnsEMBL::Analysis::Tools::BlastDB
Bio::EnsEMBL::Analysis::Tools::FeatureFilter
Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Bio::EnsEMBL::Analysis::Tools::Logger qw ( logger_info )
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning stack_trace_dump )
Inherit
Bio::EnsEMBL::Analysis::Runnable::BlastMiniBuilder
Synopsis
    my $obj = Bio::EnsEMBL::Analysis::Runnable::BlastMiniGenewise->new
('-genomic' => $genseq,
'-features' => $features,
'-protein' => $protein,
'-seqfetcher' => $seqfetcher,
'-check_repeated' => 1);
$obj->run my @newfeatures = $obj->output;
(where $protein and $genseq are Bio::Seq objects,
$features are X objects and $seqfetcher is a
SeqFetcher object.)
Description
Methods
blast_filterDescriptionCode
blast_parser
No description
Code
blastdb
No description
Code
blastdb_file
No description
Code
exonerateDescriptionCode
exonerate_optionsDescriptionCode
exonerate_pathDescriptionCode
full_seqDescriptionCode
genewise_optionsDescriptionCode
get_SequenceDescriptionCode
get_Sequences
No description
Code
ids
No description
Code
make_objectDescriptionCode
minigenewise_optionsDescriptionCode
multiminigenewise_optionsDescriptionCode
new
No description
Code
post_blast_filter
No description
Code
runDescriptionCode
run_alignment
No description
Code
run_blast
No description
Code
run_exonerateDescriptionCode
score_cutoffDescriptionCode
seqfetcherDescriptionCode
validate_Sequences
No description
Code
Methods description
blast_filtercode    nextTop
    Title   :   blast_filter
Usage : $self->blast_filter($blast_filter)
Function: Get/set method for genewise blast_filter
Returns :
Args :
exoneratecodeprevnextTop
    Title   :   exonerate
Usage : $self->exonerate
Function: Get/Set method for using exonerate rather than Blast
Returns : 0 (False) or 1 (True)
Args : 0 (False) or 1 (True)
exonerate_optionscodeprevnextTop
    Title   :   exonerate_options
Usage : $options = $self->exonerate_options
Function: Get/Set method for the options for running Exonerate
Returns : String
Args : String
exonerate_pathcodeprevnextTop
    Title   :   exonerate_path
Usage : $path = $self->exonerate_path
Function: Get/Set method for the path to the Exonerate executeable
Returns : String
Args : String
full_seqcodeprevnextTop
    Title   :   full_seq
Usage : $self->full_seq
Function: Get/Set method for using the full genomic
: sequence rather than the mini seq
Returns : 0 (False) or 1 (True)
Args : 0 (False) or 1 (True)
genewise_optionscodeprevnextTop
    Title   :   genewise_options
Usage : $self->genewise_options($genewise_options)
Function: Get/set method for genewise genewise_options
Returns :
Args :
get_SequencecodeprevnextTop
  Title   : get_Sequence
Usage : my $seq = get_Sequence($id)
Function: Fetches sequences with id $id
Returns : Bio::PrimarySeq
Args : none
make_objectcodeprevnextTop
  Args [1]   : $miniseq - a Bio::Seq object representing the
target sequence for the genewise run.
Args [2] : $features - reference to a list of
FeaturePairs generated by a blast run.
Example : $self->make_object($miniseq, $features);
Description: Takes a genomic sequence and builds a
MultiMiniGenewise runnable object using the
list of FeaturePairs.
Returntype : A list of
Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
Exceptions : none
Caller : $self->build_runnables
minigenewise_optionscodeprevnextTop
    Title   :   minigenewise_options
Usage : $self->minigenewise_options($minigenewise_options)
Function: Get/set method for minigenewise minigenewise_options
Returns :
Args :
multiminigenewise_optionscodeprevnextTop
    Title   :   multiminigenewise_options
Usage : $self->multiminigenewise_options($multiminigenewise_options)
Function: Get/set method for multiminigenewise multiminigenewise_options
Returns :
Args :
runcodeprevnextTop
  Title   : run.
Usage : $self->run()
Function: Performs a Blast search or an Exonerate search
: with all the protiens, passes the resulting proteins
: into a Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
: runnable
Returns : none
Args : none
run_exoneratecodeprevnextTop
  Title   : run_exonerate
Usage : @features = $self->run_exonerate()
Function: Uses Bio::Ensembl::Analysis::ExonerateTranscript to align
: the proteins to the slice. The supoporting features of
: the transcripts are returned
Returns : none
Args : list of Bio::EnsEMBL::BaseAlignFeature objects
score_cutoffcodeprevnextTop
    Title   :   score_cutoff
Usage : $self->score_cutoff($score_cutoff)
Function: Get/set method for genewise score_cutoff
Returns :
Args :
seqfetchercodeprevnextTop
    Title   :   seqfetcher
Usage : $self->seqfetcher($seqfetcher)
Function: Get/set method for SeqFetcher
Returns : Bio::EnsEMBL::Analysis::SeqFetcher object
Args : Bio::EnsEMBL::Pipeline::SeqFetcher object
Methods code
blast_filterdescriptionprevnextTop
sub blast_filter {
  my ($self, $arg) = @_;
  if(defined $arg){
    throw($arg." must have method filter_results") 
      unless($arg->can("filter_results"));
    $self->{blast_filter} = $arg;
  }
  if(!$self->{blast_filter}){
    $self->{blast_filter} = Bio::EnsEMBL::Analysis::Tools::FeatureFilter
      ->new(
            -max_pvalue => 100,
            -prune => 1,
            -coverage => 100,
           );
  }
  return $self->{blast_filter};
}
blast_parserdescriptionprevnextTop
sub blast_parser {
  my ($self, $value) = @_;
  if($value){
    throw($value." must have a method called parse_file") 
      unless($value->can("parse_file"));
    $self->{blast_parser} = $value;
  }
  if(!$self->{blast_parser}){
    my $regex;
    if($self->query->name =~ /^\S+\:\S*\:(\S+)\:\S*:\S*\:\S*/){
      $regex = '^\S+\:\S*\:(\S+)\:\S*:\S*\:\S*';
    }elsif ($self->query->name =~ /^(.*)\|(.*)\|(.*)/) {
      $regex = '^.*\|(.*)\|.*';
    } elsif ($self->query->name =~ /^..\:(.*)/) {
      $regex = '^..\:(.*)';
    }else {
      $regex = '^(\w+)';
    }
    $self->{blast_parser}  = Bio::EnsEMBL::Analysis::Tools::FilterBPlite
      ->new(
            -regex => $regex,
            -query_type => "pep",
            -database_type => "dna",
           );
  }
  return $self->{blast_parser};
}
blastdbdescriptionprevnextTop
sub blastdb {
  my ($self, $arg) = @_;
  if($arg){
    throw($arg." must be a blastdb object") 
      unless($arg->isa("Bio::EnsEMBL::Analysis::Tools::BlastDB"));
    $self->{blastdb} = $arg;
  }
  if(!$self->{blastdb}){
    my $blastdb = Bio::EnsEMBL::Analysis::Tools::BlastDB
      ->new(
            -sequences => [$self->query],
            -output_dir => $self->workdir,
            -mol_type => "DNA",
           );
    $self->{blastdb} = $blastdb;
  }
  return $self->{blastdb};
}
blastdb_filedescriptionprevnextTop
sub blastdb_file {
  my ($self, $arg) = @_;
  if($arg){
    throw($arg." must exist as a file") unless(-e $arg);
    $self->{blastdb_file} = $arg;
  }
  if(!$self->{blastdb_file}){
    my $blastdb_file = $self->blastdb->create_blastdb;
    foreach my $file(@{$self->blastdb->list_dbfiles}){
      $self->files_to_delete($file);
    }
    $self->{blastdb_file} = $blastdb_file;
  }
  return $self->{blastdb_file};
}

#functional methods
}
exoneratedescriptionprevnextTop
sub exonerate {
  my( $self, $value ) = @_;    

  if ($value) {
    $self->{'_exonerate'} = $value;
  }

  return $self->{'_exonerate'};
}
exonerate_optionsdescriptionprevnextTop
sub exonerate_options {
  my( $self, $value ) = @_;    

  if ($value) {
    $self->{'_exonerate_options'} = $value;
  }

  return $self->{'_exonerate_options'};
}
exonerate_pathdescriptionprevnextTop
sub exonerate_path {
  my( $self, $value ) = @_;    
  if($value){
    my $path = $self->locate_executable($value);
    $self->{'exonerate_path'} = $path;
  }
  throw($self->{'exonerate_path'}." is not executable") 
    if($self->{'exonerate_path'} && !(-x $self->{'exonerate_path'}));
  return $self->{'exonerate_path'};
}
full_seqdescriptionprevnextTop
sub full_seq {
  my( $self, $value ) = @_;    

  if ($value) {
    $self->{'_full_seq'} = $value;
  }

  return $self->{'_full_seq'};
}
genewise_optionsdescriptionprevnextTop
sub genewise_options {
  my ($self, $arg) = @_;
  if(defined $arg){
    throw("BlastMiniGenewise::genewise_options must be given an hasref".
          "not a $arg") unless(ref($arg) eq "HASH");
    $self->{'genewise_options'} = $arg;
  }
  return $self->{'genewise_options'};
}
get_SequencedescriptionprevnextTop
sub get_Sequence {
  my ($self,$id) = @_;
  my $seqfetcher = $self->seqfetcher;
  my $seq;
  my $name;
  if($seqfetcher->can('db')){
    my @dbs = $seqfetcher->db;
    $name = $dbs[0];
  }
  my ($p, $f, $l) = caller;
  if (!defined($id)) {
    warning("No id input to get_Sequence");
  }  
  
  eval {
    $seq = $seqfetcher->get_Seq_by_acc($id);
  };
  
  if($@) {
    warning("Problem fetching sequence for id [$id] with $seqfetcher $name  $@\n");
    return undef;
  }
  
  if(!defined($seq)){
    warning("Could not find sequence for [$id] with $seqfetcher $name called by $f:$l");
  }
  
  return $seq;
}


1;
}
get_SequencesdescriptionprevnextTop
sub get_Sequences {
    my ($self) = @_;

    my @seq;

    foreach my $id (@{$self->ids}) {
      logger_info("Fetching ".$id." sequence\n");
        my $seq = $self->get_Sequence($id);

        if ($seq && $seq->length > 0) {
            push(@seq,$seq);
        } else {
            print "Invalid sequence for $id - skipping\n";
        }
    }

    return\@ seq;
}
idsdescriptionprevnextTop
sub ids {
  my ($self, $arg) = @_;
  if(defined $arg){
    throw("BlastMiniGenewise::ids must be given an arrayref not a $arg")
      unless(ref($arg) eq "ARRAY");
    $self->{'ids'} = $arg;
  }
  return $self->{'ids'};
}


#Acessor methods
}
make_objectdescriptionprevnextTop
sub make_object {
  my ($self, $miniseq, $features, $cluster_start, $cluster_end) = @_;
  
#  # Before we pass our blast generated features to 
# # MultiMiniGenewise we must first convert them from
# # PepDnaAlignFeatures to FeaturePairs.
#
# my @newf;
# foreach my $f (@$features){
# my $newf = new Bio::EnsEMBL::FeaturePair(-feature1 => $f->feature2,
# -feature2 => $f->feature1);
# push(@newf,$newf);
# }
# Create a MultiMiniGenewise object with the features we've
# just converted.
my %pars = %{$self->multiminigenewise_options} if($self->multiminigenewise_options); if (defined($cluster_end)) { $pars{-cluster_start} = $cluster_start; $pars{-cluster_end} = $cluster_end; } my $mg = Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise->new ( -query => $miniseq, -features => $features, -seqfetcher => $self->seqfetcher, -minigenewise_options => $self->minigenewise_options, -fullseq => $self->full_seq, -genewise_options => $self->genewise_options, -analysis => $self->analysis, %pars, ); %pars = (); return $mg;
}
minigenewise_optionsdescriptionprevnextTop
sub minigenewise_options {
  my ($self, $arg) = @_;
  if(defined $arg){
    throw("BlastMiniGenewise::minigenewise_options must be given an hasref".
          "not a $arg") unless(ref($arg) eq "HASH");
    $self->{'minigenewise_options'} = $arg;
  }
  return $self->{'minigenewise_options'};
}
multiminigenewise_optionsdescriptionprevnextTop
sub multiminigenewise_options {
  my ($self, $arg) = @_;
  if(defined $arg){
    throw("BlastMiniGenewise::multiminigenewise_options must be given an hasref".
          "not a $arg") unless(ref($arg) eq "HASH");
    $self->{'multiminigenewise_options'} = $arg;
  }
  return $self->{'multiminigenewise_options'};
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  my( $ids, $seqfetcher, $minigenewise_options, $check_repeated, 
      $full_seq,$exonerate,$exonerate_path, $exonerate_options, $genewise_options, 
      $blast_parser, $blast_filter, $post_filter, $score_cutoff, $blastdb, $mmg_options) = 
        rearrange([qw(IDS SEQFETCHER MINIGENEWISE_OPTIONS
                      CHECK_REPEATED FULLSEQ EXONERATE EXONERATE_PATH 
                      EXONERATE_OPTIONS GENEWISE_OPTIONS BLAST_PARSER
                      BLAST_FILTER POST_BLAST_FILTER SCORE_CUTOFF BLASTDB 
                      MULTIMINIGENEWISE_OPTIONS)], @args);

  ####SETTING DEFAULTS#####
$self->check_repeated(1); #########################
$self->seqfetcher($seqfetcher); $self->ids($ids); $self->minigenewise_options($minigenewise_options); $self->full_seq($full_seq); $self->exonerate($exonerate); $self->exonerate_path($exonerate_path); $self->exonerate_options($exonerate_options); $self->check_repeated($check_repeated) if(defined($check_repeated)); $self->genewise_options($genewise_options); $self->multiminigenewise_options($mmg_options); $self->blast_parser($blast_parser); $self->blast_filter($blast_filter); $self->post_blast_filter($post_filter); $self->score_cutoff($score_cutoff); $self->blastdb($blastdb); #note this is a default but due to the nature of it defaulting to on this
#way is needed
throw("No query sequence input") unless ($self->query); throw("No seqfetcher provided") unless ($self->seqfetcher); throw("No ids arrary ref provided") unless ($self->ids); throw("No analysis defined") unless($self->analysis); return $self;
}
post_blast_filterdescriptionprevnextTop
sub post_blast_filter {
  my ($self, $arg) = @_;
  if(defined($arg)){
    $self->{post_blast_filter} = $arg;
  }
  return $self->{post_blast_filter};
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;

  my $features = $self->run_alignment;

  unless (@$features) {
    print $self->query->name." has no associated features. ".
           "Finishing run.\n";
    return;
  }

  my $mg_runnables;

  my @sorted_features = sort{$a->start <=> $b->start  
                     || $a->end <=> $b->end} @$features; 
  my %hash;
  foreach my $f(@sorted_features){
    if(!$hash{$f->hseqname}){
      $hash{$f->hseqname} = 1;
    }else{
      $hash{$f->hseqname}++;
    }
  }
  
  if ($self->check_repeated){ 
    $mg_runnables = $self->build_runnables(@sorted_features);
  } else {
    my $runnable = $self->make_object($self->query,\@ sorted_features);
    push (@$mg_runnables, $runnable); 
  }
  my $output_count;
  foreach my $mg (@$mg_runnables){
    $mg->run;
    my @f = @{$mg->output};
    $self->output(\@f);
    $output_count += @f;
  }

  $self->delete_files;
  return 1;
}
run_alignmentdescriptionprevnextTop
sub run_alignment {
  my ($self) = @_;
  my $seqs = $self->get_Sequences;
  if(@$seqs != @{$self->ids}){
    warning("Managed to get only ".scalar(@$seqs)." sequences from ".scalar(@{$self->ids}).
            " for alignment run, check indices\n");
  }

  my $valid_seqs = $self->validate_Sequences($seqs);
  if(@$valid_seqs != @$seqs){
    warning("Only ".scalar(@$valid_seqs)." sequences were validated from ".@$seqs.
            " sequences");
  }
  logger_info("There are ".@$valid_seqs." sequences to align using blast/exonerate");
  my @features; 
  my @sorted_seqs = sort {$a->id cmp $b->id} @$valid_seqs;
  foreach my $seq(@sorted_seqs){
    if($self->exonerate){
      push(@features, @{$self->run_exonerate($seq)});
    }else{
      #push(@features, @{$self->run_blast($seq)});
foreach my $f(@{$self->run_blast($seq)}){ $f->invert; $f->slice($self->query); push(@features, $f); } } } return\@ features;
}
run_blastdescriptionprevnextTop
sub run_blast {
  my ($self, $seq) = @_;
  my $blastdb_file = $self->blastdb_file;
  my $run = Bio::EnsEMBL::Analysis::Runnable::Blast
    ->new(
          -query => $seq,
          -program => 'wutblastn',
          -database => $blastdb_file,
          -parser => $self->blast_parser,
          -filter => $self->blast_filter,
          -analysis => $self->analysis,
         );
  $run->run;
  my @blast_features;
  foreach my $f (@{$run->output}){
    my $id = $seq->id;
    if($f->seqname =~ /$id/){
      $f->seqname($id);
    }else{
      throw($f->seqname." doesn't contain ".$id);
    }
    $f->slice($self->query);
    push(@blast_features,$f);
  }
  my @filter_features;
  if($self->post_blast_filter){
    #print STDERR "Filtering blast results\n";
#this code will through out any sets of features where
#none of the blast scores are higher than score set in config
#on the grounds its unlikely in that case that it will produce
#a good gene if a gene at all
my @fs; my %feature_hash; while(my $f = shift(@blast_features)){ if(!$feature_hash{$f->seqname}){ $feature_hash{$f->seqname} = []; push(@{$feature_hash{$f->seqname}}, $f); }else{ push(@{$feature_hash{$f->seqname}}, $f); } } HIT: foreach my $hid(keys(%feature_hash)){ my @hit_features = @{$feature_hash{$hid}}; foreach my $f (@hit_features){ if($f->score > $self->score_cutoff){ push(@filter_features, @hit_features); next HIT; } } } }else{ @filter_features = @blast_features; } return\@ filter_features;
}
run_exoneratedescriptionprevnextTop
sub run_exonerate {
  my ($self)= @_;
  my @features;
  my @seqs = @{$self->get_Sequences};
  if (@seqs != @{$self->ids}) {
    warning("Managed to get only " . scalar(@seqs) . "  of ".
            scalar(@{$self->ids}) ."for Exonerate run; check indices\n");
  }
  my @valid_seqs   = $self->validate_sequence(@seqs);
  my @sorted_seqs = sort {$a->id cmp $b->id} @valid_seqs;
  foreach my $seq (@sorted_seqs) {
    my $exonerate = new  Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
      (
       -program     => $self->exonerate_path,
       -analysis    => $self->analysis,
       -query_seqs  => ([$seq]),
       -query_type  => 'protein',
       -target_seqs => ([$self->query]),
       -target_type => 'dna',
       -options     => "-W 7 ". $self->exonerate_options,
      );
    eval {
      $exonerate->run;
    };
    if ($@){
      throw("Exonerate died on me$@\n");
    }
     my $transcripts = $exonerate->output;
     unless (scalar(@$transcripts > 0)){
      print "Didn't get a good exonerate alignment, trying again with a shorter word length\n";
      $exonerate = new  Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
	(
       -program     => $self->exonerate_path,
       -analysis    => $self->analysis,
       -query_seqs  => ([$seq]),
       -query_type  => 'protein',
       -target_seqs => ([$self->query]),
       -target_type => 'dna',
       -options     => "-W 5 ". $self->exonerate_options,
	);
      eval {
	$exonerate->run;
      };
      if ($@){
	throw("Exonerate died on me$@\n");
      }
      $transcripts = $exonerate->output;
    }
     unless (scalar(@$transcripts > 0)){
       print STDERR "Exonerate cannot align  ".$seq->display_id."\n ";
      next;
    }
  
    foreach my $trans (@{$exonerate->output}){
      foreach my $exon (@{$trans->get_all_Exons}){
	push @features, @{$exon->get_all_supporting_features};
      }
    }
  }
  return\@ features;
}

#=head2 make_mmgw
}
score_cutoffdescriptionprevnextTop
sub score_cutoff {
  my ($self, $arg) = @_;
  if(defined($arg)){
    $self->{score_cutoff} = $arg;
  }
  return $self->{score_cutoff};
}
seqfetcherdescriptionprevnextTop
sub seqfetcher {
  my( $self, $value ) = @_;    
  if ($value) {
    throw("BlastMiniGenewise, seqfetcher must be a ".
          "Bio::EnsEMBL::Pipeline::SeqFetcher") 
      unless ($value->isa("Bio::DB::RandomAccessI"));
    $self->{'_seqfetcher'} = $value;
  }
  return $self->{'_seqfetcher'};
}
validate_SequencesdescriptionprevnextTop
sub validate_Sequences {
  my ($self, $seqs) = @_;
  my @validated;
  
  foreach my $seq (@$seqs) {
    
    my $sequence = $seq->seq;
    if ($sequence !~ /[^acgtn]/i) {
      push (@validated, $seq);
    } else {
      $_ = $sequence;
      my $len = length ($_);
      my $invalidCharCount = tr/bB/xX/;
      
      if ($invalidCharCount / $len > 0.05) {
warning("Ignoring ".
$seq->display_id()
." contains more than 5% ($invalidCharCount) "
."odd nucleotide codes ($sequence)\n Type returns "
.$seq->moltype().")\n");
} else { $seq->seq($_); push (@validated, $seq); } } } return\@ validated;
}
General documentation
CONTACTTop
Describe contact details here
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _