Raw content of Bio::EnsEMBL::Analysis::Runnable::PSILC_BlastP
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::PSILC;
=head1 SYNOPSIS
my $blast = Bio::EnsEMBL::Analysis::Runnable::PSILC_BlastP->new
(
'-trans' => $transcript,
'-analysis' => $analysis,
);
$blast->run;
my $output = $blast->output;
=head1 DESCRIPTION
BLASTP runnable for Bio::EnsEMBL::Analysis::RunnableDB::PSILC
Runs BLASTP for the supplied transcript against a multi species blast database.
Filters the results and returns a hash ref of homologous transcript identifiers
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::PSILC_BlastP;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Config::Pseudogene;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Analysis::Runnable::Blast;
use Bio::EnsEMBL::Analysis::Tools::BPliteWrapper;
use Bio::EnsEMBL::Analysis::Tools::FilterBPlite;
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
=head2 new
Arg [1] : array of Bio::EnsEMBL::Transcript
Function : cretes PSILC BLASTP runnable
Returntype: Bio::EnsEMBL::Analysis::Runnable::PSILC_BlastP
Exceptions: none
=cut
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_trans'} = {}; #gene to test;
my( $trans) = $self->_rearrange([qw(
TRANS
)], @args);
if ($trans) {
$self->trans($trans);
}
return $self;
}
=head2 run
Arg [1] : none
Function : runs Bio::EnsEMBL::Analysis::Runnable::PSILC_BlastP
Returntype: none
Exceptions: none
=cut
sub run {
my ($self)=@_;
my $trans = $self->trans;
$self->run_blast($trans);
}
=head2 run
Arg [1] : none
Function : runs BLAST on transcript against multi species blast database
Returntype: Hash containing transcript ids of homologs that pass criteria:
target is non identical to query has at least 80% coverage and 50% sequence identity
Exceptions: throws if it cannot recognise the transcript identifier
=cut
sub run_blast{
my ($self,$trans)=@_;
my %transcript_hash;
print STDERR "Blast search of ".$trans->dbID.".\n";
# might want to use minimal blast module
my $bplitewrapper = Bio::EnsEMBL::Analysis::Tools::BPliteWrapper-> new
(
# -regex => '^\w+\s+(\w+)'
-query_type => 'dna',
-database_type => 'dna',
);
# make a query object containing the cds
my $query = Bio::Seq->new(
'-display_id' => $trans->dbID,
'-seq' => $trans->translate->seq,
);
my $blast = Bio::EnsEMBL::Analysis::Runnable::Blast->new
('-query' => $query,
'-program' => 'blastp',
'-database' => "$PSILC_BLAST_DB/multi_species.fasta",
'-threshold' => 1e-6,
'-parser' => $bplitewrapper,
'-options' => 'V=10 -cpus=1',
'-analysis' => $self->analysis,
);
$blast->run();
my $results = $blast->output;
# get dnadnaalignfeatures for each HSP
# include in set if > 50% ID
# dont include if > 99%id and 80% coverage
# Coverage based on cds not cdna length
foreach my $daf (@{$results}) {
my $coverage = $daf->length/length($query->seq);
if ($daf->percent_id <= 99 &&
$coverage > 0.8 &&
$daf->percent_id > 50 ){
# organise the transcripts in to a hash to avoid redundancy and
# group them by species
if ($daf->hseqname =~ /(\w+)_(\w+)/) {
push @{$transcript_hash{$2}},$1;
} else {
$self->throw("Cannot recognise trans_id ".$daf->hseqname." $@\n");
}
}
}
$self->output(\%transcript_hash);
}
#######################################
# Containers
=head2 trans
Arg [1] : array ref
Description: get/set trans set to run over
Returntype : array ref to Bio::EnsEMBL::Transcript objects
Exceptions : throws if not a Bio::EnsEMBL::Transcript
Caller : general
=cut
sub trans {
my ($self, $trans) = @_;
if ($trans) {
unless ($trans->isa("Bio::EnsEMBL::Transcript")){
$self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $trans\n$@");
}
$self->{'_trans'} = $trans;
}
return $self->{'_trans'};
}
=head2 output
Arg [1] : hash ref
Description: over-ridden output in runnable.pm to allow hash refs rather than array refs
Returntype : array ref to Bio::EnsEMBL::Transcript objects
Exceptions : none
Caller : general
=cut
sub output{
my ($self, $trans) = @_;
if ($trans) {
$self->{'_output'} = $trans;
}
return $self->{'_output'};
}
1;