Bio::EnsEMBL::Analysis::Runnable BlastTranscriptPep
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Runnable::Blast
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::Runnable::Blast
Synopsis
  my $blast = Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep->
new(
-transcript => $transcript,
-program => 'wublastn',
-database => 'embl_vertrna',
-options => 'hitdist=40 -cpus=1',
-parser => $bplitewrapper,
-filter => $featurefilter,
);
$blast->run;
my @output =@{$blast->output};
Description
This module acts as an intermediate between blast and transcripts. It
is primarily used by the runnabledb BlastGenscanPep to blast
the protein sequence of a genscan or any other ab initio prediction
againsts a protein database. It instantiates a blast runnable passing it
the Bio::Seq of the transcript translation as the query sequence. This
module expects all the same arguments as a standard blast with the
exception or a query sequence as these must be passed into the blast
runnable it instantiates
Methods
align_hits_to_queryDescriptionCode
newDescriptionCode
outputDescriptionCode
runDescriptionCode
transcriptDescriptionCode
Methods description
align_hits_to_querycode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep
Arg [2] : arrayref of Bio::EnsEMBL::BaseAlignFeatures
Function : convert the features from blast from peptide coodinates
to genomic coordinates
Returntype: none
Exceptions:
Example :
newcodeprevnextTop
  Arg [1]         : Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep
Arg [Transcript]: Bio::EnsEMBL::Transcript
Function : create a BlastTranscriptPep runnable
Returntype : Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep
Exceptions : none
Example :
outputcodeprevnextTop
  Function  : override the output method to allow its
resetting to empty
runcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep
Arg [2] : string, working directory
Function : instantiations Blast runnable and runs it then converts
blast hits back into genomic coords
Returntype: none
Exceptions: none
Example :
transcriptcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::BlastTranscriptPep
Arg [2] : Bio::EnsEMBL::Transcript
Function : container for transcript
Returntype: Bio::EnsEMBL::Transcript
Exceptions: throws if not passed a Bio::EnsEMBL::Transcript
Example :
Methods code
align_hits_to_querydescriptionprevnextTop
sub align_hits_to_query {
  my ($self, $features)  = @_;

  my $ff = $self->feature_factory();
  my @output;
  for my $feature ( @$features ) {
    my %exon_hash = ();
    # for each ungapped piece in it
foreach my $ugFeature ( $feature->ungapped_features() ) { my $cdna_total = 1; #convert peptide coords to genomic coords
my @split = $self->transcript->pep2genomic($ugFeature->start(), $ugFeature->end()); foreach my $gcoord ( @split ) { if($gcoord->isa('Bio::EnsEMBL::Mapper::Gap')) { $cdna_total += $gcoord->end - $gcoord->start + 1; next; } my $gstart = $gcoord->start; my $gend = $gcoord->end; my $gstrand = $gcoord->strand; my $cdna_start = $cdna_total; my $cdna_end = $cdna_start + $gend - $gstart; $cdna_total += $gend - $gstart + 1; #determine which exon this genomic coordinate overlaps
my $exon; foreach my $e (@{$self->transcript->get_all_Exons}) { if($gstart >= $e->start && $gend <= $e->end) { $exon = $e; last; } } # first, eat away non complete codons from start
while(( $cdna_start - 1 ) % 3 != 0 ) { $cdna_start++; if( $gstrand == 1 ) { $gstart++; } else { $gend--; } } # and from end
while( $cdna_end % 3 != 0 ) { $cdna_end--; if( $gstrand == 1 ) { $gend--; } else { $gstart++; } } if( $cdna_end <= $cdna_start ) { next; } my $hstart = (($cdna_start+2)/3) + $ugFeature->hstart() - 1;
my $hend = ($cdna_end / 3) + $ugFeature->hstart() - 1;
my $fp = $ff->create_feature_pair($gstart, $gend, $gstrand, $feature->score, $hstart, $hend, 1, $feature->hseqname, $feature->percent_id, $feature->p_value, undef, $self->query, $feature->analysis); push( @{$exon_hash{$exon}}, $fp ); } } # Take the pieces for each exon and make gapped feature
foreach my $ex ( keys %exon_hash ) { my $dna_align_feature = Bio::EnsEMBL::DnaPepAlignFeature->new (-features => $exon_hash{$ex}); push(@output, $dna_align_feature); } } $self->output(\@output);
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  my ($pt) = rearrange(['TRANSCRIPT'], @args);
  $self->transcript($pt);
  return $self;
}
outputdescriptionprevnextTop
sub output {
  my ($self, $arr_ref, $reset) = @_;

  if(!$self->{'output'}){
    $self->{'output'} = [];
  }
  if($arr_ref){
    throw("Must pass Runnable:output an arrayref not a ".$arr_ref)
      unless(ref($arr_ref) eq 'ARRAY');
    if ($reset) {
      $self->{'output'} = $arr_ref;
    } else {
      push(@{$self->{'output'}}, @$arr_ref);
    }
  }
  return $self->{'output'};
}
rundescriptionprevnextTop
sub run {
  my ($self, $dir) = @_;

  my $pep = $self->transcript->translate;
  $pep->id($self->transcript->dbID);

  if($pep->length <= 3){
    #transcripts this length cause problems for blast
return; } my $query = $self->query; $self->query($pep); $self->SUPER::run($dir); $self->query($query); my $out = $self->output; $self->output([], 1); $self->align_hits_to_query($out);
}
transcriptdescriptionprevnextTop
sub transcript {
  my ($self, $pt) = @_;
  if($pt){
    throw("BlastGenscanPep:transcript must be a ".
          "Bio::EnsEMBL::Transcript ") 
      unless($pt->isa("Bio::EnsEMBL::Transcript"));
    $self->{'prediction_transcript'} = $pt;
  }
  return $self->{'prediction_transcript'};
}
General documentation
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk