Raw content of Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation
package Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use File::Copy qw(mv cp);
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::ProteinFeature;
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($database) = rearrange(['DATABASE'], @args);
if(!$database){
$database = $self->analysis->db_file;
}
$self->database($database);
if(!$self->query){
throw("need to have a query defined in order to run protein ".
"annotation");
}
if(!$self->analysis){
throw("need to have a analysis defined in order to run protein ".
"annotation");
}
if(not $self->program){
if ($self->analysis->program_file) {
$self->program($self->analysis->program_file);
} elsif ($self->analysis->program) {
$self->program($self->analysis->program);
}
}
return $self; # success - we hope!
}
sub run{
my ($self, $dir) = @_;
$self->workdir ('/tmp') unless ($self->workdir($dir));
$self->checkdir;
my @input_files;
if(-s $self->query and not $self->multiprotein){
# The input is a sequence file. but we have to break up
# the file into single-sequence entries for this analysis
my %files = %{$self->get_individual_protein_files};
foreach my $file(keys(%files)){
my $id = $files{$file};
$self->queryfile($file);
$self->run_analysis();
$self->parse_results($id);
}
} else {
if (-s $self->query) {
$self->queryfile($self->query);
$self->run_analysis;
$self->parse_results;
}
elsif (ref($self->query) and
$self->query->isa("Bio::PrimarySeqI")) {
#The input is a sequence object
# write sequence to file
my $filename = $self->write_seq_file($self->query);
$self->files_to_delete($filename);
$self->queryfile($filename);
$self->run_analysis;
$self->parse_results($self->query->id);
} else {
throw("Can't run if ".$self->query." isn't either a Bio::PrimarySeq " .
" or a file which has a size greater than 0");
}
}
$self->delete_files;
}
sub get_individual_protein_files {
my ($self) = @_;
if(!$self->{_protein_files}){
$self->{_protein_files} = {};
}
my $in = Bio::SeqIO->new(-file => $self->query, '-format' =>'Fasta');
while(my $tmpseq = $in->next_seq()){
my $stub = $self->analysis->logic_name.".".$tmpseq->display_id.".$$";
my $filename = $self->create_filename($self->analysis->logic_name,
$stub . "seq");
$filename = $self->write_seq_file($tmpseq, $filename);
$self->{_protein_files}{$filename} = $tmpseq->display_id;
$self->files_to_delete($filename);
}
return $self->{_protein_files};
}
#######################################
sub create_protein_feature{
my ($self, $start, $end, $score, $seqname, $hstart,
$hend, $hseqname,$analysis, $p_value, $percent_id) = @_;
my $fp = Bio::EnsEMBL::ProteinFeature->new(
-start => $start,
-end => $end,
-hstart => $hstart,
-hend => $hend,
-percent_id => $percent_id,
-score => $score,
-p_value => $p_value,
-hseqname => $hseqname,
-seqname => $seqname,
-analysis => $analysis,
);
return $fp;
}
###################################
sub query {
my ($self, $seq) = @_;
if ($seq) {
if (ref($seq) and $seq->isa("Bio::PrimarySeqI")) {
$self->{_sequence} = $seq;
} elsif (-e $seq) {
$self->{_sequence} = $seq;
} else {
throw("You must provide either a Bio::Seq or a file which ".
" exists not $seq");
}
}
return $self->{_sequence};
}
##################################
sub queryfile{
my ($self, $filename) = @_;
if($filename){
$self->{_queryfile} = $filename;
}
if(not exists $self->{_queryfile}){
$self->{_queryfile} = $self->create_filename($self->analysis->logic_name . '.seq',
'fa');
$self->files_to_delete($self->{_query_file});
}
return $self->{_queryfile};
}
##############################
sub resultsfile{
my ($self, $filename) = @_;
if($filename){
$self->{_resultsfile} = $filename;
}
if(not exists $self->{_resultsfile}){
$self->{_resultsfile} = $self->create_filename($self->analysis->logic_name . '.results',
'out');
$self->files_to_delete($self->{_resultsfile});
}
return $self->{_resultsfile};
}
################################
sub database {
my ($self, $database) = @_;
if (defined $database) {
$self->{_database} = $database;
}
return $self->{_database};
}
##################################
sub multiprotein{
my ($self) = @_;
throw($self->program. "'s module must implement this method to define ".
" if it can handle multi sequence fasta files or needs to take ".
" one protein at once");
}
1;