Raw content of Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::IPRScan
package Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::IPRScan;
use vars qw(@ISA);
use strict;
# Object preamble - inherits from Bio::EnsEMBL::Root;
use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation);
sub new{
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
######################
#SETTING THE DEFAULTS#
######################
$self->options('-cli -appl blastprodom -appl fprintscan -appl hmmpfam -appl superfamily -appl hmmpir -appl scanregexp -format raw') if(!$self->options);
#$self->options('-cli -appl blastprodom -appl fprintscan -appl hmmpfam -appl hmmpir -appl hmmpanther -appl hmmtigr -appl hmmsmart -appl superfamily -format raw') if(!$self->options);
#self->options('-cli -appl blastprodom -appl fprintscan -appl hmmpfam -appl hmmpir -appl hmmpanther -appl hmmtigr -appl hmmsmart -appl superfamily -appl gene3d -format raw')
######################
return $self;
}
#testing the different programs
# -appl Application(s) to run (optional), default is all.
# Possible values (dependent on set-up):
# * blastprodom
# * fprintscan
# 8 hmmpfam
# 8 hmmpir
# * hmmpanther
# 8 hmmtigr
# 7 hmmsmart
# 8 superfamily
# * gene3d
# 8 scanregexp
# 8 profilescan
# 8 seg
# 8 coils
# 8 [tmhmm]
# 8 [signalp]
#################
sub multiprotein{
my ($self) = @_;
return 1;
}
##################
sub run_analysis{
my ($self) = @_;
#example command
#/nfs/acari/lec/code/interpro/interpro_ftp/iprscan/bin/iprscan -cli -format raw
#-i /nfs/acari/lec/code/interpro/interpro_ftp/iprscan/test.seq
print "RUNNING ANALYSIS\n";
my $command = $self->program." ".$self->options." -i ".$self->queryfile.
" >& ".$self->resultsfile;
print "Running ".$command."\n";
throw ("Error running ".$command)unless ((system ($command)) == 0);
}
sub parse_results{
my ($self) = @_;
print "PARSING RESULTS\n";
open(OUT, "<".$self->resultsfile) or throw("FAILED to open ".
$self->resultsfile.
" IPRSCAN:parse_results");
my @out;
while(){
print;
chomp;
if(/SUBMITTED\s+(\S+)/){
my $dir_name = $1;
#print "GETTING filename HAVE ".$dir_name."\n";
#iprscan-20071115-10500980
my ($base_dir) = $dir_name =~ /iprscan\-(\d+)\-\d+/;
my $full_path = $self->workdir."/".$base_dir."/".$dir_name;
$self->files_to_delete($full_path);
#print $full_path."\n";
next;
}
next if((!$_) || (!$_ =~ /\d+/));
#print $_."\n";
#19897 4AB50DD3FDE96DB6 468 ProfileScan PS50157 ZINC_FINGER_C2H2_2 243 270 16.685 T 14-Nov-2007
my ($translation_id, $crd, $aa_length, $method, $hit_name, $desc, $start, $end, $evalue, $status, $date) = split /\t/, $_;
if(!$start || !$end){
next;
}
if($evalue && !($evalue =~ /\d+/)){
$evalue = undef;
}
if($start > $end){
my $temp = $start;
$start = $end;
$end = $temp;
}
my $pf = $self->create_protein_feature($start, $end, 0, $translation_id,
0, 0, $hit_name, $self->analysis,
$evalue, 0);
throw("Protein feature has no translation id") if(!$pf->seqname);
push(@out, $pf);
}
$self->output(\@out);
close(OUT);
}
#NF00181542 is the id of the input sequence.
#27A9BBAC0587AB84 is the crc64 (checksum) of the protein sequence (supposed to be unique).
#272 is the length of the sequence (in AA).
#HMMPIR is the anaysis method launched.
#PIRSF001424 is the database members entry for this match.
#Prephenate dehydratase is the database member description for the entry.
#1 is the start of the domain match.
#270 is the end of the domain match.
#6.5e-141 is the evalue of the match (reported by member database anayling method).
#T is the status of the match (T: true, M: marginal).
#06-Aug-2005 is the date of the run.