Bio::EnsEMBL::Analysis::Runnable
ExonerateArray
Toolbar
Summary
Bio::EnsEMBL::Pipeline::Runnable::ExonerateArray
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
$database = a full path location for the directory containing the target (genomic usually) sequence,
@sequences = a list of Bio::Seq objects,
$exonerate = a location for the binary,
$options = a string with options ,
my $runnable = Bio::EnsEMBL::Pipeline::Runnable::ExonerateArray->new(
-db =>$db,
-query_seqs => \@sequences,
-program => $exonerate,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my $results = $runnable->output;
where $results is an arrayref of MiscFeatures.
Description
ExonerateArray takes a Bio::Seq (or Bio::PrimarySeq) object and runs Exonerate
against a set of sequences. The resulting output file is parsed
to produce a set of features.
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::ExonerateArray Arg [2] : string, filename Function : open and parse the results file into misc_features features Returntype: none Exceptions: throws on failure to open or close output file Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable Arg [2] : string, directory Function : a generic run method. This checks the directory specifed to run it, write the query sequence to file, marks the query sequence file and results file for deletion, runs the analysis parses the results and deletes any files Returntype: 1 Exceptions: throws if no query sequence is specified Example : |
Methods code
_make_affy_features | description | prev | next | Top |
sub _make_affy_features
{
my ($self,@h) = @_;
my @misc_feats;
my $feature_factory = $self->feature_factory;
foreach my $h (@h) {
my ($coord_system_name,$seq_region_name,$slice) ;
if ($h->{'q_id'} =~/Zebrafish/i) {
$coord_system_name = undef;
if ($h->{'t_id'} =~ /^(\S+)\-1\-.*$/) {
$seq_region_name = $1;
$seq_region_name =~ s/\-1$//;
}
elsif ($h->{'t_id'} =~ /^(\S+)\..*$/) {
$seq_region_name = $1;
}
}
else {
$coord_system_name = "chromosome";
($seq_region_name) = $h->{'t_id'} =~ /^(\S+)\..*$/;
}
$slice = $self->db->get_SliceAdaptor->fetch_by_region($coord_system_name,$seq_region_name);
if (!$slice) {
warning("In ExonerateArray::make_affy_features: Could not obtain slice for seq_region: $coord_system_name : $seq_region_name---try with coord_system_name = undef\n");
$coord_system_name = undef;
$slice = $self->db->get_SliceAdaptor->fetch_by_region($coord_system_name,$seq_region_name);
if (!$slice) {
warning("In ExonerateArray::make_affy_features: Could not obtain slice for seq_region: $coord_system_name : $seq_region_name");
next;
}
}
my $probe_name = $h->{'q_id'};
$probe_name =~ s/^probe\://;
my ($array_name,$composite_name) = split /\:/, $probe_name;
$composite_name =~ s/\;$//;
my $xref_name = $array_name;
$xref_name =~ s/-/_/g;
my $misc_feature = $feature_factory->create_misc_feature
($h->{'t_start'},$h->{'t_end'},$h->{'t_strand'},$slice);
$feature_factory->add_misc_feature_attribute
( $misc_feature, "probeName","Probe name","the name of the probe",$probe_name);
$feature_factory->add_misc_feature_attribute
($misc_feature, "compositeName","Composite name", "the name of the composite", $composite_name);
$feature_factory->add_misc_feature_attribute
($misc_feature,"probeLength", "Probe length", "the length of the probe", $h->{'probe_length'});
$feature_factory->add_misc_feature_attribute
($misc_feature, "matchLength", "Match length", "number of bases in matched alignment", $h->{'matching_length'});
$feature_factory->add_misc_feature_attribute
($misc_feature,"matchStatus", "Match status", "full_match or with_mismatch", $h->{'match_status'});
$feature_factory->add_misc_set
($misc_feature, "AFFY\_$xref_name", $array_name, "array set name", $self->max_length);
$feature_factory->add_misc_set
($misc_feature, "All_Affy", "All-Array-Sets", "all array sets", $self->max_length);
push (@misc_feats, $misc_feature);
}
$self->output(\@misc_feats);
}
}
sub db
{
my ($self, $db) = @_;
if ($db) {
$self->{'db'} = $db ;
}
return $self->{'db'};
}
}
sub length
{
my ($self, $length) = @_;
if ($length) {
$self->{'length'} = $length ;
}
return $self->{'length'};
}
}
sub max_length
{
my ($self, $max_length) = @_;
if (defined($max_length) ){
$self->{'max_length'} = $max_length;
}
return $self->{'max_length'};
}
1;
}
sub new
{
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($db,$query_seqs) =
rearrange([qw(
DB
QUERY_SEQS
)
], @args);
$self->db($db) if $db;
unless( $query_seqs ){
throw("Exonerate needs a query_seqs: $query_seqs");
}
our (%length);
my $queryfile = $self->queryfile();
foreach my $query_seq (@{$query_seqs}) {
$length{$query_seq->display_id} = $query_seq->length;
$self->write_seq_file ($query_seq);
}
my @lengths = sort {$b<=>$a} values %length;
my $max_length = $lengths[0];
$self->max_length($max_length);
$self->length(\%length);
return $self;
}
}
sub parse_results
{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my %length = %{$self->length()};
open( EXO, $results ) || throw("FAILED to open ".$results." ExonerateArray::parse_results");
my (@pro_features);
while (<EXO>){
if (/^vulgar\:/) {
my $h={};
chomp;
my ( $tag, $q_id, $q_start, $q_end, $q_strand, $t_id, $t_start, $t_end, $t_strand, $score, $match, $matching_length) = split;
$q_start++;
$t_start++;
my $strand;
if ($q_strand eq $t_strand) {
$strand = 1;
}
else {
$strand = -1;
}
$h->{'q_id'} = $q_id;
$h->{'q_start'} = $q_start;
$h->{'q_end'} = $q_end;
$h->{'q_strand'} = $strand;
$h->{'t_id'} = $t_id;
$h->{'t_start'} = $t_start;
$h->{'t_end'} = $t_end;
$h->{'t_strand'} = $strand;
$h->{'score'} = $score;
$h->{'probe_length'} = $length{$h->{'q_id'}};
$h->{'matching_length'} = $matching_length;
if ($h->{'matching_length'} == $h->{'probe_length'}-1) {
$h->{'match_status'} = "Mismatch";
push @pro_features, $h;
}
elsif ($h->{'matching_length'} == $h->{'probe_length'}) {
if ($h->{'score'} == 125) {
$h->{'match_status'} = "Fullmatch";
}
else {
$h->{'match_status'} = "Mismatch";
}
push @pro_features, $h;
}
}
}
close(EXO) or $self->throw("couldn't close pipe ");
$self->_make_affy_features(@pro_features);
}
sub run
{
my ($self, $dir) = @_;
if(!$dir){
$dir = $self->workdir;
}
$self->checkdir($dir);
my $filename = $self->write_seq_file();
$self->files_to_delete($filename);
$self->files_to_delete($self->resultsfile);
$self->run_analysis();
$self->parse_results;
$self->delete_files;
return 1;
}
write_seq_file | description | prev | next | Top |
sub write_seq_file
{
my ($self, $seq, $filename) = @_;
if(!$seq){
$seq = $self->query;
}
if(!$filename){
$filename = $self->queryfile;
}
my $seqout = Bio::SeqIO->new(
-file => ">>".$filename,
-format => 'Fasta',
);
eval{
$seqout->write_seq($seq) if $seq;
};
if($@){
throw("seq is $seq\nFAILED to write $seq to $filename Runnable:write_seq_file : $@");
}
return $filename;
}
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _