Bio::EnsEMBL::Compara::Production::GenomicAlignBlock AlignmentProcessing
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::GenomicAlignBlock
Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor
Bio::EnsEMBL::Hive::Process
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
Abstract base class of AlignmentChains and AlignmentNets
Description
Methods
INPUT_METHOD_LINK_TYPE
No description
Code
MAX_GAP
No description
Code
MIN_CHAIN_SCORE
No description
Code
OUTPUT_METHOD_LINK_TYPE
No description
Code
cleanse_output
No description
Code
compara_cigars_from_daf_cigar
No description
Code
compara_dba
No description
Code
convert_output
No description
Code
daf_cigar_from_compara_cigars
No description
Code
delete_alignments
No description
Code
fetch_input
No description
Code
get_params
No description
Code
new
No description
Code
output
No description
Code
output_MethodLinkSpeciesSet
No description
Code
query_DnaFrag_hash
No description
Code
runDescriptionCode
sort_chains_by_max_block_score
No description
Code
split_feature
No description
Code
target_DnaFrag_hash
No description
Code
write_outputDescriptionCode
Methods description
runcode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB
Function : cycles through all the runnables, calls run and pushes
their output into the RunnableDBs output array
Returntype: array ref
Exceptions: none
Example :
write_outputcodeprevnextTop
    Title   :   write_output
Usage : $self->write_output()
Function: Writes contents of $self->output into $self->db
Returns : 1
Args : None
Methods code
INPUT_METHOD_LINK_TYPEdescriptionprevnextTop
sub INPUT_METHOD_LINK_TYPE {
  my ($self, $type) = @_;

  if (defined $type) {
    $self->{_in_method_link_type} = $type;
  }
  
  return $self->{_in_method_link_type};
}
MAX_GAPdescriptionprevnextTop
sub MAX_GAP {
  my ($self, $val) = @_;
  
  if (defined $val) {
    $self->{_max_gap} = $val;
  }

  return $self->{_max_gap};
}
MIN_CHAIN_SCOREdescriptionprevnextTop
sub MIN_CHAIN_SCORE {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_min_chain_score} = $val ;
  }
  if (not exists $self->{_min_chain_score}) {
    return undef;
  } else {
    return $self->{_min_chain_score};
  }
}

1;
}
OUTPUT_METHOD_LINK_TYPEdescriptionprevnextTop
sub OUTPUT_METHOD_LINK_TYPE {
  my ($self, $type) = @_;

  if (defined $type) {
    $self->{_out_method_link_type} = $type;
  }
  
  return $self->{_out_method_link_type};
}
cleanse_outputdescriptionprevnextTop
sub cleanse_output {
  my ($self, $chains) = @_;

  # need to "cleanse" the of its original database attachments, so 
# that it is stored as a fresh blocks. This involves touching the
# object's privates, but more efficent than creating brand-new
# blocks from scratch
# NB don't undef group_id - I want to keep the chain group_id for the net.
foreach my $chain (@{$chains}) { foreach my $gab (@{$chain}) { $gab->{'adaptor'} = undef; $gab->{'dbID'} = undef; $gab->{'method_link_species_set_id'} = undef; $gab->method_link_species_set($self->output_MethodLinkSpeciesSet); foreach my $ga (@{$gab->get_all_GenomicAligns}) { $ga->{'adaptor'} = undef; $ga->{'dbID'} = undef; $ga->{'method_link_species_set_id'} = undef; $ga->method_link_species_set($self->output_MethodLinkSpeciesSet); } } } } ###################################
# redundant alignment deletion
}
compara_cigars_from_daf_cigardescriptionprevnextTop
sub compara_cigars_from_daf_cigar {
  my ($self, $daf_cigar) = @_;

  my ($q_cigar_line, $t_cigar_line, $align_length);

  my @pieces = split(/(\d*[MDI])/, $daf_cigar);

  my ($q_counter, $t_counter) = (0,0);

  foreach my $piece ( @pieces ) {

    next if ($piece !~ /^(\d*)([MDI])$/);
    
    my $num = ($1 or 1);
    my $type = $2;
    
    if( $type eq "M" ) {
      $q_counter += $num;
      $t_counter += $num;
      
    } elsif( $type eq "D" ) {
      $q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M";
      $q_counter = 0;
      $q_cigar_line .= (($num == 1) ? "" : $num)."D";
      $t_counter += $num;
      
    } elsif( $type eq "I" ) {
      $q_counter += $num;
      $t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M";
      $t_counter = 0;
      $t_cigar_line .= (($num == 1) ? "" : $num)."D";
    }
    $align_length += $num;
  }

  $q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M"
      if $q_counter;
  $t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M"
      if $t_counter;
  
  return ($q_cigar_line, $t_cigar_line, $align_length);
}
compara_dbadescriptionprevnextTop
sub compara_dba {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_compara_dba} = $val;
  }

  return $self->{_compara_dba};
}
convert_outputdescriptionprevnextTop
sub convert_output {
  my ($self, $chains_of_dafs) = @_; 

  my (@chains_of_blocks);

  foreach my $chain_of_dafs (@$chains_of_dafs) {
    my @chain_of_blocks;

    foreach my $raw_daf (sort {$a->start <=> $b->start} @$chain_of_dafs) {
      my @split_dafs;
      if ($self->MAX_GAP) {
        @split_dafs = $self->split_feature($raw_daf, $self->MAX_GAP);
      } else {
        @split_dafs = ($raw_daf);
      }

      foreach my $daf (@split_dafs) {
        my ($q_cigar, $t_cigar, $al_len) = 
            $self->compara_cigars_from_daf_cigar($daf->cigar_string);
        
        my $q_dnafrag = $self->query_DnaFrag_hash->{$daf->seqname};
        my $t_dnafrag = $self->target_DnaFrag_hash->{$daf->hseqname};
        
        my $out_mlss = $self->output_MethodLinkSpeciesSet;
        
        my $q_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
            (-dnafrag        => $q_dnafrag,
             -dnafrag_start  => $daf->start,
             -dnafrag_end    => $daf->end,
             -dnafrag_strand => $daf->strand,
             -cigar_line     => $q_cigar,
             -level_id       => $daf->level_id ? $daf->level_id : 1,
             -method_link_species_set => $out_mlss);
        
        my $t_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
            (-dnafrag        => $t_dnafrag,
             -dnafrag_start  => $daf->hstart,
             -dnafrag_end    => $daf->hend,
             -dnafrag_strand => $daf->hstrand,
             -cigar_line     => $t_cigar,
             -level_id       => $daf->level_id ? $daf->level_id : 1,
             -method_link_species_set => $out_mlss);

        my $gen_al_block = Bio::EnsEMBL::Compara::GenomicAlignBlock->new
            (-genomic_align_array     => [$q_genomic_align, $t_genomic_align],
             -score                   => $daf->score,
             -length                  => $al_len,
             -method_link_species_set => $out_mlss,
	     -group_id                => $daf->group_id);
        
        push @chain_of_blocks, $gen_al_block;
      }
    }

    push @chains_of_blocks,\@ chain_of_blocks;
  }
    
  return\@ chains_of_blocks;
}
daf_cigar_from_compara_cigarsdescriptionprevnextTop
sub daf_cigar_from_compara_cigars {
  my ($self, $q_cigar, $t_cigar) = @_;

  my (@q_pieces, @t_pieces);
  foreach my $piece (split(/(\d*[MDGI])/, $q_cigar)) {
    next if ($piece !~ /^(\d*)([MDGI])$/);

    my $num = $1; $num = 1 if $num eq "";
    my $type = $2; $type = 'D' if $type ne 'M';

    if ($num > 0) {
      push @q_pieces, { num  => $num,
                        type => $type, 
                      };
    }
  }
  foreach my $piece (split(/(\d*[MDGI])/, $t_cigar)) {
    next if ($piece !~ /^(\d*)([MDGI])$/);
    
    my $num = $1; $num = 1 if $num eq "";
    my $type = $2; $type = 'D' if $type ne 'M';

    if ($num > 0) {
      push @t_pieces, { num  => $num,
                        type => $type,
                      };
    }
  }

  my $daf_cigar = "";

  while(@q_pieces and @t_pieces) {
    # should never be left with a q piece and no target pieces, or vice versa
my $q = shift @q_pieces; my $t = shift @t_pieces; if ($q->{num} == $t->{num}) { if ($q->{type} eq 'M' and $t->{type} eq 'M') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M'; } elsif ($q->{type} eq 'M' and $t->{type} eq 'D') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'I'; } elsif ($q->{type} eq 'D' and $t->{type} eq 'M') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D'; } else { # must be a delete in both seqs; warn and ignore
warn("The following cigars have a simultaneous gap:\n" . $q_cigar . "\n". $t_cigar . "\n"); } } elsif ($q->{num} > $t->{num}) { if ($q->{type} ne 'M') { warn("The following cigars are strange:\n" . $q_cigar . "\n". $t_cigar . "\n"); } if ($t->{type} eq 'M') { $daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'M'; } elsif ($t->{type} eq 'D') { $daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'I'; } unshift @q_pieces, { type => 'M', num => $q->{num} - $t->{num}, }; } else { # $t->{num} > $q->{num}
if ($t->{type} ne 'M') { warn("The following cigars are strange:\n" . $q_cigar . "\n". $t_cigar . "\n"); } if ($q->{type} eq 'M') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M'; } elsif ($q->{type} eq 'D') { $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D'; } unshift @t_pieces, { type => 'M', num => $t->{num} - $q->{num}, }; } } # final sanity checks
if (@q_pieces or @t_pieces) { warn("Left with dangling pieces in the following cigars:\n" . $q_cigar . "\n". $t_cigar . "\n"); return undef; } my $last_type; foreach my $piece (split(/(\d*[MDI])/, $daf_cigar)) { next if not $piece; my ($type) = ($piece =~ /\d*([MDI])/); if (defined $last_type and (($last_type eq 'I' and $type eq 'D') or ($last_type eq 'D' and $type eq 'I'))) { warn("Adjacent Insert/Delete in the following cigars:\n" . $q_cigar . "\n". $t_cigar . "\n". $daf_cigar . "\n"); return undef; } $last_type = $type; } return $daf_cigar;
}
delete_alignmentsdescriptionprevnextTop
sub delete_alignments {
  my ($self, $mlss, $qy_dnafrag, $tg_dnafrag) = @_;

  my $dbc = $self->compara_dba->dbc;
  my $sql = "SELECT ga1.genomic_align_block_id, ga1.genomic_align_id, ga2.genomic_align_id
      FROM genomic_align ga1, genomic_align ga2
      WHERE ga1.genomic_align_block_id=ga2.genomic_align_block_id
      AND ga1.genomic_align_id != ga2.genomic_align_id
      AND ga1.dnafrag_id = ?
      AND ga1.method_link_species_set_id = ?";
  if (defined $tg_dnafrag) {
    $sql .= " AND ga2.dnafrag_id = ?";
  }


  my $sth = $dbc->prepare($sql);
  if (defined $tg_dnafrag) {
    $sth->execute( $qy_dnafrag->dbID, $mlss->dbID, $tg_dnafrag->dbID);
  } else {
    $sth->execute( $qy_dnafrag->dbID, $mlss->dbID);
  }

  my $nb_gabs = 0;
  my @gabs;
  while (my $aref = $sth->fetchrow_arrayref) {
    my ($gab_id, $ga_id1, $ga_id2) = @$aref;
    push @gabs, [$gab_id, $ga_id1, $ga_id2];
    $nb_gabs++;
  }

  my $sql_gab = "delete from genomic_align_block where genomic_align_block_id in ";
  my $sql_ga = "delete from genomic_align where genomic_align_id in ";

  for (my $i=0; $i < scalar @gabs; $i=$i+20000) {
    my (@gab_ids, @ga1_ids, @ga2_ids);
    for (my $j = $i; ($j < scalar @gabs && $j < $i+20000); $j++) {
      push @gab_ids, $gabs[$j][0];
      push @ga1_ids, $gabs[$j][1];
      push @ga2_ids, $gabs[$j][2];
    }
    my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ")";
    my $sql_ga_to_exec1 = $sql_ga . "(" . join(",", @ga1_ids) . ")";
    my $sql_ga_to_exec2 = $sql_ga . "(" . join(",", @ga2_ids) . ")";

    foreach my $sql ($sql_gab_to_exec,$sql_ga_to_exec1,$sql_ga_to_exec2) {
      my $sth = $dbc->prepare($sql);
      $sth->execute;
      $sth->finish;
    }
  }
}


###################################
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;

  $self->compara_dba(Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor
                     ->new(-dbconn => $self->db->dbc ));
}
get_paramsdescriptionprevnextTop
sub get_params {
  my ($self, $param_string) = @_;

    return unless($param_string);
  #print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string); return unless($params); # from parameters
if(defined($params->{'input_method_link'})) { $self->INPUT_METHOD_LINK_TYPE($params->{'input_method_link'}); } if(defined($params->{'output_method_link'})) { $self->OUTPUT_METHOD_LINK_TYPE($params->{'output_method_link'}); } if (defined($params->{'max_gap'})) { $self->MAX_GAP($params->{'max_gap'}); } if (defined($params->{'min_chain_score'})) { $self->MIN_CHAIN_SCORE($params->{'min_chain_score'}); }
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
 
  $self->query_DnaFrag_hash({});
  $self->target_DnaFrag_hash({});

  $self->get_params($self->analysis->parameters);
  $self->get_params($self->input_id);

  return $self;
}
outputdescriptionprevnextTop
sub output {
  my ($self, $output) = @_;
  if(!$self->{output}){
    $self->{output} = [];
  }
  if($output){
    if(ref($output) ne 'ARRAY'){
      throw('Must pass RunnableDB:output an array ref not a '.$output);
    }
    push(@{$self->{output}}, @$output);
  }
  return $self->{output};
}



#################################
# common config variable holders
#################################
}
output_MethodLinkSpeciesSetdescriptionprevnextTop
sub output_MethodLinkSpeciesSet {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_out_mlss} = $val;
  }

  return $self->{_out_mlss};
}
query_DnaFrag_hashdescriptionprevnextTop
sub query_DnaFrag_hash {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_q_dna_frags} = $val;
  }
  
  return $self->{_q_dna_frags};
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
  foreach my $runnable(@{$self->runnable}){
    $runnable->run;
    my $converted_output = $self->convert_output($runnable->output);
    $self->output($converted_output);
    rmdir($runnable->workdir) if (defined $runnable->workdir);
  }
}
sort_chains_by_max_block_scoredescriptionprevnextTop
sub sort_chains_by_max_block_score {
  my ($self, $chains) = @_;

  # sort the chains by maximum score
my @chain_hashes; foreach my $chain (@$chains) { my $chain_hash = { chain => $chain }; foreach my $block (@$chain) { if (not exists $chain_hash->{qname}) { $chain_hash->{qname} = $block->seqname; $chain_hash->{tname} = $block->hseqname; } if (not exists $chain_hash->{score} or $block->score > $chain_hash->{score}) { $chain_hash->{score} = $block->score; } } push @chain_hashes, $chain_hash; } my @sorted = map { $_->{chain}} sort { $b->{score} <=> $a->{score} or $a->{qname} cmp $b->{qname} or $a->{tname} cmp $b->{tname} } @chain_hashes; return\@ sorted; } ###########################################
# feature splitting
###########################################
}
split_featuredescriptionprevnextTop
sub split_feature {
  my ($self, $f, $max_gap) = @_;

  my @split_dafs;
  
  my $need_to_split = 0;

  my @pieces = split(/(\d*[MDI])/, $f->cigar_string);
  foreach my $piece ( @pieces ) {
    next if ($piece !~ /^(\d*)([MDI])$/);
    my $num = ($1 or 1);
    my $type = $2;

    if (($type eq "I" or $type eq "D") and $num >= $max_gap) {
      $need_to_split = 1;
      last;
    }
  }
  
  if ($need_to_split) {
    my (@new_feats);
    foreach my $ug (sort {$a->start <=> $b->start} $f->ungapped_features) {
      if (@new_feats) {
        my ($dist, $hdist);

        my $last_ug = $new_feats[-1]->[-1];

        if ($ug->end < $last_ug->start) {
          # blocks in reverse orienation
$dist = $last_ug->start - $ug->end - 1; } else { # blocks in forward orienatation
$dist = $ug->start - $last_ug->end - 1; } if ($ug->hend < $last_ug->hstart) { # blocks in reverse orienation
$hdist = $last_ug->hstart - $ug->hend - 1; } else { # blocks in forward orienatation
$hdist = $ug->hstart - $last_ug->hend - 1; } if ($dist >= $max_gap or $hdist >= $max_gap) { push @new_feats, []; } } else { push @new_feats, []; } push @{$new_feats[-1]}, $ug; } foreach my $mini_list (@new_feats) { push @split_dafs, Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => $mini_list); } } else { @split_dafs = ($f) } return @split_dafs; } ############################################
# cigar conversion
############################################
}
target_DnaFrag_hashdescriptionprevnextTop
sub target_DnaFrag_hash {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_t_dna_frags} = $val;
  }
  
  return $self->{_t_dna_frags};
}
write_outputdescriptionprevnextTop
sub write_output {
  my($self) = @_;

  my $compara_dba = $self->compara_dba;
  my @gen_al_groups;
  foreach my $chain (@{$self->output}) {
      my $group_id;

      #store first block
my $first_block = shift @$chain; $compara_dba->get_GenomicAlignBlockAdaptor->store($first_block); #Set the group_id if one doesn't already exist ie for chains, to be the
#dbID of the first genomic_align_block. For nets,the group_id has already
#been set and is the same as it's chain.
unless (defined($first_block->group_id)) { $group_id = $first_block->dbID; $compara_dba->get_GenomicAlignBlockAdaptor->store_group_id($first_block, $group_id); } #store the rest of the genomic_align_blocks
foreach my $block (@$chain) { if (defined $group_id) { $block->group_id($group_id); } $compara_dba->get_GenomicAlignBlockAdaptor->store($block); } } } ###########################################
# chain sorting
###########################################
}
General documentation
No general documentation available.