Bio::EnsEMBL::Analysis::RunnableDB AlignmentFilter
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::AlignmentFilter
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::RunnableDB
Bio::EnsEMBL::Compara::DBSQL::DBAdaptor
Bio::EnsEMBL::Compara::GenomicAlignBlock
Bio::EnsEMBL::Compara::GenomicAlignGroup
Bio::EnsEMBL::DBSQL::DBAdaptor
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::RunnableDB
Synopsis
Abstract base class of AlignmentChains and AlignmentNets
Description
Methods
COMPARA_DB
No description
Code
INPUT_METHOD_LINK_TYPE
No description
Code
MAX_GAP
No description
Code
MIN_CHAIN_SCORE
No description
Code
OUTPUT_METHOD_LINK_TYPE
No description
Code
compara_cigars_from_daf_cigar
No description
Code
convert_output
No description
Code
daf_cigar_from_compara_cigars
No description
Code
new
No description
Code
output_MethodLinkSpeciesSet
No description
Code
query_DnaFrag_hash
No description
Code
runDescriptionCode
sort_chains_by_max_block_score
No description
Code
split_feature
No description
Code
target_DnaFrag_hash
No description
Code
write_outputDescriptionCode
Methods description
runcode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB
Function : cycles through all the runnables, calls run and pushes
their output into the RunnableDBs output array
Returntype: array ref
Exceptions: none
Example :
write_outputcodeprevnextTop
    Title   :   write_output
Usage : $self->write_output()
Function: Writes contents of $self->output into $self->db
Returns : 1
Args : None
Methods code
COMPARA_DBdescriptionprevnextTop
sub COMPARA_DB {
  my ($self, $db) = @_;

  if (defined $db) {
    $self->{_compara_db} = $db;
  }

  return $self->{_compara_db};
}
INPUT_METHOD_LINK_TYPEdescriptionprevnextTop
sub INPUT_METHOD_LINK_TYPE {
  my ($self, $type) = @_;

  if (defined $type) {
    $self->{_in_method_link_type} = $type;
  }
  
  return $self->{_in_method_link_type};
}
MAX_GAPdescriptionprevnextTop
sub MAX_GAP {
  my ($self, $val) = @_;
  
  if (defined $val) {
    $self->{_max_gap} = $val;
  }

  return $self->{_max_gap};
}
MIN_CHAIN_SCOREdescriptionprevnextTop
sub MIN_CHAIN_SCORE {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_min_chain_score} = $val ;
  }
  if (not exists $self->{_min_chain_score}) {
    return undef;
  } else {
    return $self->{_min_chain_score};
  }
}



1;
}
OUTPUT_METHOD_LINK_TYPEdescriptionprevnextTop
sub OUTPUT_METHOD_LINK_TYPE {
  my ($self, $type) = @_;

  if (defined $type) {
    $self->{_out_method_link_type} = $type;
  }
  
  return $self->{_out_method_link_type};
}
compara_cigars_from_daf_cigardescriptionprevnextTop
sub compara_cigars_from_daf_cigar {
  my ($self, $daf_cigar) = @_;

  my ($q_cigar_line, $t_cigar_line, $align_length);

  my @pieces = split(/(\d*[MDI])/, $daf_cigar);

  my ($q_counter, $t_counter) = (0,0);

  foreach my $piece ( @pieces ) {

    next if ($piece !~ /^(\d*)([MDI])$/);
    
    my $num = ($1 or 1);
    my $type = $2;
    
    if( $type eq "M" ) {
      $q_counter += $num;
      $t_counter += $num;
      
    } elsif( $type eq "D" ) {
      $q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M";
      $q_counter = 0;
      $q_cigar_line .= (($num == 1) ? "" : $num)."D";
      $t_counter += $num;
      
    } elsif( $type eq "I" ) {
      $q_counter += $num;
      $t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M";
      $t_counter = 0;
      $t_cigar_line .= (($num == 1) ? "" : $num)."D";
    }
    $align_length += $num;
  }

  $q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M"
      if $q_counter;
  $t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M"
      if $t_counter;
  
  return ($q_cigar_line, $t_cigar_line, $align_length);
}
convert_outputdescriptionprevnextTop
sub convert_output {
  my ($self, $chains_of_dafs) = @_; 

  my (@chains_of_blocks);

  foreach my $chain_of_dafs (@$chains_of_dafs) {
    my @chain_of_blocks;

    foreach my $raw_daf (sort {$a->start <=> $b->start} @$chain_of_dafs) {
      my @split_dafs;
      if ($self->MAX_GAP) {
        @split_dafs = $self->split_feature($raw_daf, $self->MAX_GAP);
      } else {
        @split_dafs = ($raw_daf);
      }

      my ($group, @gas);
      if (defined $self->INPUT_GROUP_TYPE) {
        $group = Bio::EnsEMBL::Compara::GenomicAlignGroup->new
          (-type                => $self->INPUT_GROUP_TYPE);
      }

      foreach my $daf (@split_dafs) {
        my ($q_cigar, $t_cigar, $al_len) = 
            $self->compara_cigars_from_daf_cigar($daf->cigar_string);
        
        my $q_dnafrag = $self->query_DnaFrag_hash->{$daf->seqname};
        my $t_dnafrag = $self->target_DnaFrag_hash->{$daf->hseqname};
        
        my $out_mlss = $self->output_MethodLinkSpeciesSet;
        
        my $q_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
            (-dnafrag        => $q_dnafrag,
             -dnafrag_start  => $daf->start,
             -dnafrag_end    => $daf->end,
             -dnafrag_strand => $daf->strand,
             -cigar_line     => $q_cigar,
             -level_id       => $daf->level_id ? $daf->level_id : 1,
             -method_link_species_set => $out_mlss);
        
        my $t_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
            (-dnafrag        => $t_dnafrag,
             -dnafrag_start  => $daf->hstart,
             -dnafrag_end    => $daf->hend,
             -dnafrag_strand => $daf->hstrand,
             -cigar_line     => $t_cigar,
             -level_id       => $daf->level_id ? $daf->level_id : 1,
             -method_link_species_set => $out_mlss);

        if (defined $group) {
          unless (defined $group->dbID) {
            $group->dbID($daf->group_id);
          }
          push @gas, ($q_genomic_align, $t_genomic_align);
        }
        
        my $gen_al_block = Bio::EnsEMBL::Compara::GenomicAlignBlock->new
            (-genomic_align_array => [$q_genomic_align, $t_genomic_align],
             -score               => $daf->score,
             -length              => $al_len,
             -method_link_species_set => $out_mlss);
        
        push @chain_of_blocks, $gen_al_block;
      }
      if (defined $group) {
        $group->genomic_align_array(\@gas);
      }
    }

    push @chains_of_blocks,\@ chain_of_blocks;
  }
    
  return\@ chains_of_blocks;
}


###################################
}
daf_cigar_from_compara_cigarsdescriptionprevnextTop
sub daf_cigar_from_compara_cigars {
  my ($self, $q_cigar, $t_cigar) = @_;

  my (@q_pieces, @t_pieces);
  foreach my $piece (split(/(\d*[MDGI])/, $q_cigar)) {
    next if ($piece !~ /^(\d*)([MDGI])$/);

    my $num = $1; $num = 1 if $num eq "";
    my $type = $2; $type = 'D' if $type ne 'M';

    if ($num > 0) {
      push @q_pieces, { num  => $num,
                        type => $type, 
                      };
    }
  }
  foreach my $piece (split(/(\d*[MDGI])/, $t_cigar)) {
    next if ($piece !~ /^(\d*)([MDGI])$/);
    
    my $num = $1; $num = 1 if $num eq "";
    my $type = $2; $type = 'D' if $type ne 'M';

    if ($num > 0) {
      push @t_pieces, { num  => $num,
                        type => $type,
                      };
    }
  }

  my $daf_cigar = "";

  while(@q_pieces and @t_pieces) {
    # should never be left with a q piece and no target pieces, or vice versa
my $q = shift @q_pieces; my $t = shift @t_pieces; if ($q->{num} == $t->{num}) { if ($q->{type} eq 'M' and $t->{type} eq 'M') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M'; } elsif ($q->{type} eq 'M' and $t->{type} eq 'D') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'I'; } elsif ($q->{type} eq 'D' and $t->{type} eq 'M') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D'; } else { # must be a delete in both seqs; warn and ignore
warn("The following cigars have a simultaneous gap:\n" . $q_cigar . "\n". $t_cigar . "\n"); } } elsif ($q->{num} > $t->{num}) { if ($q->{type} ne 'M') { warn("The following cigars are strange:\n" . $q_cigar . "\n". $t_cigar . "\n"); } if ($t->{type} eq 'M') { $daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'M'; } elsif ($t->{type} eq 'D') { $daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'I'; } unshift @q_pieces, { type => 'M', num => $q->{num} - $t->{num}, }; } else { # $t->{num} > $q->{num}
if ($t->{type} ne 'M') { warn("The following cigars are strange:\n" . $q_cigar . "\n". $t_cigar . "\n"); } if ($q->{type} eq 'M') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M'; } elsif ($q->{type} eq 'D') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D'; } unshift @t_pieces, { type => 'M', num => $t->{num} - $q->{num}, }; } } # final sanity checks
if (@q_pieces or @t_pieces) { warn("Left with dangling pieces in the following cigars:\n" . $q_cigar . "\n". $t_cigar . "\n"); return undef; } my $last_type; foreach my $piece (split(/(\d*[MDI])/, $daf_cigar)) { next if not $piece; my ($type) = ($piece =~ /\d*([MDI])/); if (defined $last_type and (($last_type eq 'I' and $type eq 'D') or ($last_type eq 'D' and $type eq 'I'))) { warn("Adjacent Insert/Delete in the following cigars:\n" . $q_cigar . "\n". $t_cigar . "\n". $daf_cigar . "\n"); return undef; } $last_type = $type; } return $daf_cigar;
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
 
  $self->query_DnaFrag_hash({});
  $self->target_DnaFrag_hash({});

  return $self;
}
output_MethodLinkSpeciesSetdescriptionprevnextTop
sub output_MethodLinkSpeciesSet {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_out_mlss} = $val;
  }

  return $self->{_out_mlss};
}


#################################
# common config variable holders
#################################
}
query_DnaFrag_hashdescriptionprevnextTop
sub query_DnaFrag_hash {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_q_dna_frags} = $val;
  }
  
  return $self->{_q_dna_frags};
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
  foreach my $runnable(@{$self->runnable}){
    $runnable->run;
    my $converted_output = $self->convert_output($runnable->output);
    $self->output($converted_output);
    rmdir($runnable->workdir) if (defined $runnable->workdir);
  }
}
sort_chains_by_max_block_scoredescriptionprevnextTop
sub sort_chains_by_max_block_score {
  my ($self, $chains) = @_;

  # sort the chains by maximum score
my @chain_hashes; foreach my $chain (@$chains) { my $chain_hash = { chain => $chain }; foreach my $block (@$chain) { if (not exists $chain_hash->{qname}) { $chain_hash->{qname} = $block->seqname; $chain_hash->{tname} = $block->hseqname; } if (not exists $chain_hash->{score} or $block->score > $chain_hash->{score}) { $chain_hash->{score} = $block->score; } } push @chain_hashes, $chain_hash; } my @sorted = map { $_->{chain}} sort { $b->{score} <=> $a->{score} or $a->{qname} cmp $b->{qname} or $a->{tname} cmp $b->{tname} } @chain_hashes; return\@ sorted; } ###########################################
# feature splitting
###########################################
}
split_featuredescriptionprevnextTop
sub split_feature {
  my ($self, $f, $max_gap) = @_;

  my @split_dafs;
  
  my $need_to_split = 0;

  my @pieces = split(/(\d*[MDI])/, $f->cigar_string);
  foreach my $piece ( @pieces ) {
    next if ($piece !~ /^(\d*)([MDI])$/);
    my $num = ($1 or 1);
    my $type = $2;

    if (($type eq "I" or $type eq "D") and $num >= $max_gap) {
      $need_to_split = 1;
      last;
    }
  }
  
  if ($need_to_split) {
    my (@new_feats);
    foreach my $ug (sort {$a->start <=> $b->start} $f->ungapped_features) {
      if (@new_feats) {
        my ($dist, $hdist);

        my $last_ug = $new_feats[-1]->[-1];

        if ($ug->end < $last_ug->start) {
          # blocks in reverse orienation
$dist = $last_ug->start - $ug->end - 1; } else { # blocks in forward orienatation
$dist = $ug->start - $last_ug->end - 1; } if ($ug->hend < $last_ug->hstart) { # blocks in reverse orienation
$hdist = $last_ug->hstart - $ug->hend - 1; } else { # blocks in forward orienatation
$hdist = $ug->hstart - $last_ug->hend - 1; } if ($dist >= $max_gap or $hdist >= $max_gap) { push @new_feats, []; } } else { push @new_feats, []; } push @{$new_feats[-1]}, $ug; } foreach my $mini_list (@new_feats) { push @split_dafs, Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => $mini_list); } } else { @split_dafs = ($f) } return @split_dafs; } ############################################
# cigar conversion
############################################
}
target_DnaFrag_hashdescriptionprevnextTop
sub target_DnaFrag_hash {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_t_dna_frags} = $val;
  }
  
  return $self->{_t_dna_frags};
}
write_outputdescriptionprevnextTop
sub write_output {
  my($self) = @_;

  my $compara_dbh;
  if (defined $self->COMPARA_DB) {
    $compara_dbh = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(%{$self->COMPARA_DB});
  } else {
    $compara_dbh = $self->{'comparaDBA'};
  }

  my @gen_al_groups;
  foreach my $chain (@{$self->output}) {
    my $group_id;
    #store first block
my $first_block = shift @$chain; $compara_dbh->get_GenomicAlignBlockAdaptor->store($first_block); # Set the group_id if one doesn't already exist ie for chains, to be the
# dbID of the first genomic_align_block. For nets,the group_id has already
# been set and is the same as it's chain.
unless (defined($first_block->group_id)) { $group_id = $first_block->dbID; $compara_dbh->get_GenomicAlignBlockAdaptor->store_group_id($first_block, $group_id); } #store the rest of the genomic_align_blocks
foreach my $block (@$chain) { $block->group_id($group_id); $compara_dbh->get_GenomicAlignBlockAdaptor->store($block); } } } ###########################################
# chain sorting
###########################################
}
General documentation
No general documentation available.