Raw content of Bio::EnsEMBL::Analysis::Runnable::ExonerateAffy =pod =head1 NAME Bio::EnsEMBL::Analysis::Runnable::ExonerateAffy =head1 SYNOPSIS my $runnable = Bio::EnsEMBL::Analysis::Runnable::ExonerateAffy->new( -query_seqs => \@q_seqs, -query_type => 'dna', -target_seqs => \@t_seqs, -options => $options, ); $runnable->run; #create and fill Bio::Seq object my @results = $runnable->output; =head1 DESCRIPTION This module handles a specific use of the Exonerate (G. Slater) program, to align Affymetrix probes to a target genome. (The resulting alignments will probably be stored in an ensembl db as Bio::EnsEMBL::AffyFeature objects.) NOTE: the AffyFeature objects refer to AffyProbe id's, and they in turn refer to AffyArray id's. Both the AffyProbes and the AffyArray's should be pre-loaded into the ensembl db: there are separate RunnableDB /RunnableDB's to do this from the Affymetrix data sets. This runnable just creates fake affy probes in order to create reasonable-looking affy features. =head1 CONTACT ensembl-dev@ebi.ac.uk =cut package Bio::EnsEMBL::Analysis::Runnable::ExonerateAffy; use vars qw(@ISA); use strict; use Bio::EnsEMBL::Analysis::Runnable; use Bio::EnsEMBL::Analysis::Runnable::BaseExonerate; use Bio::EnsEMBL::AffyArray; use Bio::EnsEMBL::AffyProbe; use Bio::EnsEMBL::AffyFeature; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); @ISA = qw(Bio::EnsEMBL::Analysis::Runnable::BaseExonerate); sub new { my ( $class, @args ) = @_; my $self = $class->SUPER::new(@args); return $self; } # # Implementation of method in abstract superclass # sub parse_results { my ( $self, $fh ) = @_; my @affy_features; while (<$fh>){ #print STDERR $_ if $self->_verbose; next unless /^RESULT:/; chomp; my ( $tag, $q_id, $q_start, $q_end, $q_strand, $t_id, $t_start, $t_end, $t_strand, $score, $perc_id, $q_length, $t_length, $gene_orientation, @vulgar_blocks ) = split; my($match_type, $query_match_length, $target_match_length,@rest_of_vulgar) = @vulgar_blocks; if(@rest_of_vulgar){ throw ( "There is more than a simple match ('M') vulgar output: @vulgar_blocks for affy tag $q_id mapped to region $t_id \n" ); } if(!($match_type eq 'M')){ throw "I have received a starting Vulgar symbol $match_type which is not a match!"; } my $affy_feature = $self->make_affy_feature( $q_id, $q_start, $q_end, $q_strand, $t_id, $t_start, $t_end, $t_strand, $score, $q_length, $query_match_length ); if($affy_feature){ push @affy_features, $affy_feature; }else{ warn "Affy feature from probe :$q_id doesnt match well enough\n"; } } return \@affy_features; } # # Create affy features attached to 'fake' affy array's and affy probe # objects: these are identified by name only, and are then persisted by # runnabledb: the runnabledb can sort out whether the attached probe/array # is valid (exists in the db). # sub make_affy_feature{ my ($self, @args) = @_; my ( $tag, $q_id, $q_start, $q_end, $q_strand, $t_id, $t_start, $t_end, $t_strand, $score, $q_length, $query_match_length ) = @_; #because of the 'in-between' coordinates. $t_start += 1; my $probe_internal_id = $q_id; my $mismatch_count; if(!($probe_internal_id =~ /\d+/)){ throw "Affy probe headers MUST be the internal db ids of the affy probes for this parser to work!\n"; } # If we miss by more than one, don't create the feature. if($query_match_length == $q_length){ if($score == 125){ $mismatch_count = 0; }elsif ($score == 116) { $mismatch_count = 1; } else { return undef; } }elsif($query_match_length == $q_length -1){ if ($score == 120) { $mismatch_count = 1; } else { return undef; } }else{ return undef; } #Create a dummy probe object to pass out. It must be checked and #replaced by the parent runnabledb before being stored. my $affy_probe = new Bio::EnsEMBL::AffyProbe( -dbID =>$q_id, -name => "replace this probe name", -arrayname => "replace this array name", ); # Everything is 'flipped' into the forward strand of the probe - # so a hit on the reverse strand of the probe (q_strand = '-1') # is altered: q_strand = '+1' and t_strand => -1 x t_strand. if($q_strand eq '+'){ if($t_strand eq '+'){ $t_strand = 1; }elsif($t_strand eq '-'){ $t_strand = -1; }else{ throw "unrecognised target strand symbol: $t_strand\n"; } }elsif($q_strand eq '-'){ if($t_strand eq '-'){ $t_strand = 1; }elsif($t_strand eq '+'){ $t_strand = -1; }else{ throw "unrecognised target strand symbol: $t_strand\n"; } }else{ throw "unrecognised query strand symbol: $q_strand\n"; } my $affy_feature = new Bio::EnsEMBL::AffyFeature( -probe => $affy_probe, -start => $t_start, -end => $t_end, -strand => $t_strand, -mismatchcount => $mismatch_count ); # attach the slice name onto the feature: let the runnabledb # sort out whether it's valid. $affy_feature->seqname($t_id); return $affy_feature; } 1;