Bio::EnsEMBL::Analysis::Runnable
BaseExonerate
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Do NOT instantiate this class directly: must be instantiated
from a subclass (see ExonerateTranscript, for instance).
Description
This is an abstract superclass to handle the common functionality for
Exonerate runnables: namely it provides
- a consistent external interface to drive exonerate regardless of
what features youre finally producing, and
- a common process to stop people duplicating function (eg how to
arrange command-line arguments, what ryo-string to use etc).
It does NOT provide the parser to convert the exonerate output
into Transcripts or AffyFeatures etc. That is the job of the
subclasses, which MUST implement the parse_results method.
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::BaseExonerate Arg [2] : pointer to file-handle Function : This method MUST be coded by the base class to return an array-ref of features. arguments - is passed a pointer to a filehandle which is the output of exonerate. Exonerate's basic options - it's always run with - are: --showsugar false --showvulgar false --showalignment false --ryo \"RESULT: %S %pi %ql %tl %g %V\\n\" so this tells you what the output file will look like: you have to code the parser accordingly. Returntype: Listref of <things> Example : my ( $self, $fh ) = @_; while (<$fh>){ next unless /^RESULT:/; chomp; my ( $tag, $q_id, $q_start, $q_end, $q_strand, $t_id, $t_start, $t_end, $t_strand, $score, $perc_id, $q_length, $t_length, $gene_orientation, @vulgar_blocks ) = split; ...now do something with the match information and / or vulgar blocks } |
Usage : $obj->run($workdir, $args) Function: Runs exonerate script and puts the results into the file $self->results It calls $self->parse_results, and results are stored in $self->output |
Methods code
sub _verbose
{ my ($self, $val) = @_;
if ($val){
$self->{_verbose} = $val;
}
return $self->{_verbose};
}
1; } |
sub annotation_features
{ my ($self, $feats) = @_;
if ($feats){
foreach my $k (keys %$feats) {
my $f = $feats->{$k};
unless ($f->isa("Bio::EnsEMBL::Feature")) {
throw("annotation features must be Bio::EnsEMBL::Features");
}
}
$self->{_annot_feats} = $feats;
}
return $self->{_annot_feats};
}
} |
sub annotation_file
{ my ($self, $file) = @_;
if (defined $file) {
$self->{_annot_file} = $file;
}
return $self->{_annot_file};
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my
(
$query_type, $query_seqs, $query_file, $q_chunk_num, $q_chunk_total,
$target_seqs, $target_file, $t_chunk_num, $t_chunk_total,
$annotation_features, $annotation_file, $verbose, $basic_options
) =
rearrange(
[
qw(
QUERY_TYPE
QUERY_SEQS
QUERY_FILE
QUERY_CHUNK_NUMBER
QUERY_CHUNK_TOTAL
TARGET_SEQS
TARGET_FILE
TARGET_CHUNK_NUMBER
TARGET_CHUNK_TOTAL
ANNOTATION_FEATURES
ANNOTATION_FILE
VERBOSE
BASIC_OPTIONS
)
],
@args
);
$self->_verbose($verbose) if $verbose;
if (defined($query_seqs)) {
if(ref($query_seqs) ne "ARRAY"){
throw("You must supply an array reference with -query_seqs");
}
$self->query_seqs($query_seqs);
} elsif (defined $query_file) {
throw("The given query file ".$query_file." does not exist")
if ! -e $query_file;
$self->query_file($query_file);
}
if ($query_type){
$self->query_type($query_type);
} else{
$self->query_type('dna');
}
if (defined $target_seqs) {
if (ref($target_seqs) ne "ARRAY") {
throw("You must supply an array reference with -target_seqs");
}
$self->target_seqs($target_seqs);
} elsif (defined $target_file) {
throw("The given database does not exist") if ! -e $target_file;
$self->target_file($target_file);
}
if (defined $annotation_features) {
if (ref($annotation_features) ne "HASH") {
throw("You must supply a hash reference with -annotation_features");
}
$self->annotation_features($annotation_features);
} elsif (defined $annotation_file) {
throw("The given annotation file does not exist") if ! -e $annotation_file;
$self->annotation_file($annotation_file);
}
if (not $self->program) {
$self->program('/usr/local/ensembl/bin/exonerate-0.8.3');
}
$basic_options ||= "--showsugar false --showvulgar false --showalignment false --ryo\" RESULT: %S %pi %ql %tl %g %V\\n\" ";
if (defined $q_chunk_num and defined $q_chunk_total) {
$basic_options .= "--querychunkid $q_chunk_num --querychunktotal $q_chunk_total ";
}
if (defined $t_chunk_num and defined $t_chunk_total) {
$basic_options .= "--targetchunkid $t_chunk_num --targetchunktotal $t_chunk_total ";
}
if ($self->options){
$basic_options .= $self->options;
}
$self->options($basic_options);
return $self;
}
} |
sub parse_results
{ throw ("This method must be provided by a subclass and not invoked directly!\n ");
}
} |
sub query_file
{ my ($self, $file) = @_;
if ($file) {
$self->{_query_file} = $file;
}
return $self->{_query_file};
}
} |
sub query_seqs
{ my ($self, $seqs) = @_;
if ($seqs){
unless ($seqs->[0]->isa("Bio::PrimarySeqI") || $seqs->[0]->isa("Bio::SeqI")){
throw("query seq must be a Bio::SeqI or Bio::PrimarySeqI");
}
$self->{_query_seqs} = $seqs;
}
return $self->{_query_seqs};
}
} |
sub query_type
{ my ($self, $mytype) = @_;
if (defined($mytype) ){
my $type = lc($mytype);
unless( $type eq 'dna' || $type eq 'protein' ){
throw("not the right query type: $type");
}
$self->{_query_type} = $type;
}
return $self->{_query_type};
}
} |
sub run
{ my ($self) = @_;
if ($self->annotation_features) {
my $annot_file = $self->workdir . "/exonerate_a.$$";
open F, ">$annot_file" or
throw "Could not open temp $annot_file for writing";
foreach my $id (keys %{$self->annotation_features}) {
my $f = $self->annotation_features->{$id};
printf(F "%s %s %d %d\n",
$f->seqname,
$f->strand < 0 ? "-" : "+",
$f->start,
$f->length);
}
close(F);
$self->files_to_delete($annot_file);
$self->annotation_file($annot_file);
} elsif ($self->annotation_file) {
my %feats;
open F, $self->annotation_file or
throw("Could not open supplied annotation file for reading");
while(<F>) {
/^(\S+)\s+(\S+)\s+(\d+)\s+(\d+)/ and do {
$feats{$1} = Bio::EnsEMBL::Feature->new(-seqname => $1,
-strand => $2 eq "-" ? -1 : 1,
-start => $3,
-end => $3 + $4 - 1);
};
}
close(F);
$self->annotation_features(\%feats);
}
if ($self->query_seqs) {
my $query_file = $self->workdir . "/exonerate_q.$$";
my $seqout =
Bio::SeqIO->new(
'-format' => 'fasta',
'-file' => ">$query_file"
);
foreach my $seq ( @{$self->query_seqs} ) {
$seqout->write_seq($seq);
}
$self->files_to_delete($query_file);
$self->query_file($query_file);
}
if ($self->target_seqs) {
my $target_file = $self->workdir . "/exonerate_t.$$";
my $seqout =
Bio::SeqIO->new(
'-format' => 'fasta',
'-file' => ">$target_file"
);
foreach my $seq ( @{$self->target_seqs} ) {
$seqout->write_seq($seq);
}
$self->files_to_delete($target_file);
$self->target_file($target_file);
}
my $command =
$self->program . " " .$self->options .
" --querytype " . $self->query_type .
" --targettype " . $self->target_type .
" --query " . $self->query_file .
" --target " . $self->target_file;
$command .= " --annotation " . $self->annotation_file if $self->annotation_features;
print STDERR "Exonerate command : $command\n";
my $exo_fh;
open( $exo_fh, "$command |" ) or throw("Error opening exonerate command: $? $!");
$self->output($self->parse_results( $exo_fh ));
close( $exo_fh ) or throw ("Error closing exonerate command: $? $!");
$self->delete_files;
return 1; } |
sub target_file
{ my ($self, $file) = @_;
if ($file) {
$self->{_target_file} = $file;
}
return $self->{_target_file};
}
} |
sub target_seqs
{ my ($self, $seqs) = @_;
if ($seqs){
unless ($seqs->[0]->isa("Bio::PrimarySeqI") || $seqs->[0]->isa("Bio::SeqI")){
throw("query seq must be a Bio::SeqI or Bio::PrimarySeqI");
}
$self->{_target_seqs} = $seqs;
}
return $self->{_target_seqs};
}
} |
sub target_type
{ my ($self) = @_;
return 'dna';
}
} |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _