Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation PIRSF
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  Bio::EnsEMBL::Pipeline::Runnable::ProteinAnnotation::PIRSF
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation
Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hmmpfam
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation
Synopsis
  my $seg = Bio::EnsEMBL::Pipeline::Runnable::ProteinAnnotation::PIRSF->
new (
-query => $query,
-analysis => $analysis,
-database => $db,
-datfile => $datfile,
);
Description
Methods
filter_main
No description
Code
filter_sub_family
No description
Code
multiprotein
No description
Code
new
No description
Code
parse_results
No description
Code
process_thresholds_file
No description
Code
run_analysis
No description
Code
thresholds
No description
Code
Methods description
None available.
Methods code
filter_maindescriptionprevnextTop
sub filter_main {
  my ($self, $hits, $dat) = @_;

  my @pass_hits;

  foreach my $hit (@$hits) {
    my $this_dat = $dat->{$hit->hseqname};
    
    if (not defined $this_dat->{mean_score}) {
      # this must be a sub-family hit; filter it out, hoping 
# that the superfamily will be hit, and hence this sub-family
# hit in the refined search
next; } my $len_prop = $hit->length / $self->query->length;
my $len_diff = abs($self->query->length - $this_dat->{mean_length}); my $score_diff = $hit->score - $this_dat->{mean_score}; if ($len_prop >= 0.8 and ($score_diff >= -1*$this_dat->{std_score} or ($hit->score >= $this_dat->{min_score} and $score_diff >= -2.5 * $this_dat->{std_score})) and ($len_diff < 3.5 * $this_dat->{std_length})) { push @pass_hits, $hit; } } if (@pass_hits) { @pass_hits = sort { $a->p_value <=> $b->p_value} @pass_hits; return $pass_hits[0]; } else { return undef; }
}
filter_sub_familydescriptionprevnextTop
sub filter_sub_family {
  my ($self, $hits, $dat);
  
  my @pass_hits;
    foreach my $hit (@$hits) {
    my $this_dat = $dat->{$hit->hseqname};

    if ($hit->score >= $this_dat->{min_score}) {
      push @pass_hits, $hit;
    }
  }

  if (@pass_hits) {
    @pass_hits = sort { $a->pvalue <=> $b->pvalue} @pass_hits;
    return $pass_hits[0];
  } else {
    return undef;
  }

}


##############################
}
multiproteindescriptionprevnextTop
sub multiprotein {
  my ($self) = @_;
  return 1;
}
newdescriptionprevnextTop
sub new {
  my ($class, @args) = @_;

  my $self = $class->SUPER::new(@args);
  
  my ($threshold_file) = rearrange(['THRESHOLDS'], @args);

  if (not defined $threshold_file) {
    throw("You must supply a threshold file for PIRSF");
  } elsif (not -e $threshold_file) {
    throw("Threshold file '$threshold_file' could not be found");
  }

  $self->thresholds($threshold_file);

  throw("You must supply this runnable with a sequence object")
      if not ref($self->query) or not $self->query->isa("Bio::PrimarySeqI");

  return $self;
}


###############################
}
parse_resultsdescriptionprevnextTop
sub parse_results {
  my ($self) = @_;
  
  # nothing to do
return; } ###############################
}
process_thresholds_filedescriptionprevnextTop
sub process_thresholds_file {
  my ($self, $file) = @_;

  my (%data);

  open(DAT, $file); 
  while(<DAT>) {
    /^\>(\S+)(\s*.+)?/ and do {
      my $acc = $1; my $rest = $2;
      
      my $line = <DAT>; $line = <DAT>;
      
      my @fields = split /\s+/, $line;

      $data{$acc} = {
        acc => $acc,
        mean_length => $fields[0],
        std_length  => $fields[1],
        min_score   => $fields[2],
        mean_score  => @fields > 3 ? $fields[3] : undef,
        std_score   => @fields > 3 ? $fields[4] : undef,
      };

      if ($rest and $rest =~ /child:\s+(.+)/) {
        foreach my $cacc (split /\s+/, $1) {
          $data{$acc}->{children}->{$cacc} = 1;
        }
      }
    }
  }

  return\% data;
}
run_analysisdescriptionprevnextTop
sub run_analysis {
  my ($self) = @_;

  my $dat = $self->process_thresholds_file($self->thresholds);

  my ($main_hmm, $sf_hmm) = split(/;/, $self->database);

  my $main_run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hmmpfam->
      new(-query => $self->query,
          -analysis => $self->analysis,
          -database => $main_hmm,
          -options  => $self->options);
  $main_run->run;

  my $best_hit = $self->filter_main($main_run->output, $dat);

  if (defined $best_hit and exists($dat->{$best_hit->hseqname}->{children})) {
    my $sf_run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hmmpfam->
        new(-query => $self->query,
            -analysis => $self->analysis,
            -database => $sf_hmm,
            -options  => $self->options);
    $sf_run->run;

    my @relevant_hits;
    foreach my $hit (@{$sf_run->output}) {
      if ($dat->{$best_hit->hseqname}->{children}->{$hit->hseqname}) {
        push @relevant_hits, $hit;
      }
    }

    if (@relevant_hits) {
      my $best_sf_hit = $self->filter_sub_family(\@relevant_hits, $dat);
      if (defined $best_sf_hit) {
        $best_hit = $best_sf_hit;
      }
    }
  }

  my $out = [];
  if (defined $best_hit) {
    push @$out, $best_hit;
  }

  $self->output($out);
}
thresholdsdescriptionprevnextTop
sub thresholds {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_thresholds} = $val;
  }

  return $self->{_thresholds};
}
General documentation
CONTACTTop