AlignmentProcessing documentation.

INPUT_METHOD_LINK_TYPE	No description	Code
MAX_GAP	No description	Code
MIN_CHAIN_SCORE	No description	Code
OUTPUT_METHOD_LINK_TYPE	No description	Code
cleanse_output	No description	Code
compara_cigars_from_daf_cigar	No description	Code
compara_dba	No description	Code
convert_output	No description	Code
daf_cigar_from_compara_cigars	No description	Code
delete_alignments	No description	Code
fetch_input	No description	Code
get_params	No description	Code
new	No description	Code
output	No description	Code
output_MethodLinkSpeciesSet	No description	Code
query_DnaFrag_hash	No description	Code
run	Description	Code
sort_chains_by_max_block_score	No description	Code
split_feature	No description	Code
target_DnaFrag_hash	No description	Code
write_output	Description	Code

Methods description

run

code

Top

  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB
  Function  : cycles through all the runnables, calls run and pushes
  their output into the RunnableDBs output array
  Returntype: array ref
  Exceptions: none
  Example   :

write_output

code

Top

    Title   :   write_output
    Usage   :   $self->write_output()
    Function:   Writes contents of $self->output into $self->db
    Returns :   1
    Args    :   None

Methods code

INPUT_METHOD_LINK_TYPE

description

Top

sub INPUT_METHOD_LINK_TYPE {

  my ($self, $type) = @_;

  if (defined $type) {
    $self->{_in_method_link_type} = $type;
  }
  
  return $self->{_in_method_link_type};

}

MAX_GAP

description

Top

sub MAX_GAP {

  my ($self, $val) = @_;
  
  if (defined $val) {
    $self->{_max_gap} = $val;
  }

  return $self->{_max_gap};

}

MIN_CHAIN_SCORE

description

Top

sub MIN_CHAIN_SCORE {

  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_min_chain_score} = $val ;
  }
  if (not exists $self->{_min_chain_score}) {
    return undef;
  } else {
    return $self->{_min_chain_score};
  }
}

1;

}

OUTPUT_METHOD_LINK_TYPE

description

Top

sub OUTPUT_METHOD_LINK_TYPE {

  my ($self, $type) = @_;

  if (defined $type) {
    $self->{_out_method_link_type} = $type;
  }
  
  return $self->{_out_method_link_type};

}

cleanse_output

description

Top

sub cleanse_output {

  my ($self, $chains) = @_;

  # need to "cleanse" the of its original database attachments, so 
  # that it is stored as a fresh blocks. This involves touching the
  # object's privates, but more efficent than creating brand-new
  # blocks from scratch
  # NB don't undef group_id - I want to keep the chain group_id for the net.

  foreach my $chain (@{$chains}) {
    foreach my $gab (@{$chain}) {

      $gab->{'adaptor'} = undef;
      $gab->{'dbID'} = undef;
      $gab->{'method_link_species_set_id'} = undef;
      $gab->method_link_species_set($self->output_MethodLinkSpeciesSet);
      foreach my $ga (@{$gab->get_all_GenomicAligns}) {
        $ga->{'adaptor'} = undef;
        $ga->{'dbID'} = undef;
        $ga->{'method_link_species_set_id'} = undef;
        $ga->method_link_species_set($self->output_MethodLinkSpeciesSet);
      }
    }
  }

}


###################################
# redundant alignment deletion

}

compara_cigars_from_daf_cigar

description

Top

sub compara_cigars_from_daf_cigar {

  my ($self, $daf_cigar) = @_;

  my ($q_cigar_line, $t_cigar_line, $align_length);

  my @pieces = split(/(\d*[MDI])/, $daf_cigar);

  my ($q_counter, $t_counter) = (0,0);

  foreach my $piece ( @pieces ) {

    next if ($piece !~ /^(\d*)([MDI])$/);
    
    my $num = ($1 or 1);
    my $type = $2;
    
    if( $type eq "M" ) {
      $q_counter += $num;
      $t_counter += $num;
      
    } elsif( $type eq "D" ) {
      $q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M";
      $q_counter = 0;
      $q_cigar_line .= (($num == 1) ? "" : $num)."D";
      $t_counter += $num;
      
    } elsif( $type eq "I" ) {
      $q_counter += $num;
      $t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M";
      $t_counter = 0;
      $t_cigar_line .= (($num == 1) ? "" : $num)."D";
    }
    $align_length += $num;
  }

  $q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M"
      if $q_counter;
  $t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M"
      if $t_counter;
  
  return ($q_cigar_line, $t_cigar_line, $align_length);

}

compara_dba

description

Top

sub compara_dba {

  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_compara_dba} = $val;
  }

  return $self->{_compara_dba};

}

convert_output

description

Top

sub convert_output {

  my ($self, $chains_of_dafs) = @_; 

  my (@chains_of_blocks);

  foreach my $chain_of_dafs (@$chains_of_dafs) {
    my @chain_of_blocks;

    foreach my $raw_daf (sort {$a->start <=> $b->start} @$chain_of_dafs) {
      my @split_dafs;
      if ($self->MAX_GAP) {
        @split_dafs = $self->split_feature($raw_daf, $self->MAX_GAP);
      } else {
        @split_dafs = ($raw_daf);
      }

      foreach my $daf (@split_dafs) {
        my ($q_cigar, $t_cigar, $al_len) = 
            $self->compara_cigars_from_daf_cigar($daf->cigar_string);
        
        my $q_dnafrag = $self->query_DnaFrag_hash->{$daf->seqname};
        my $t_dnafrag = $self->target_DnaFrag_hash->{$daf->hseqname};
        
        my $out_mlss = $self->output_MethodLinkSpeciesSet;
        
        my $q_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
            (-dnafrag        => $q_dnafrag,
             -dnafrag_start  => $daf->start,
             -dnafrag_end    => $daf->end,
             -dnafrag_strand => $daf->strand,
             -cigar_line     => $q_cigar,
             -level_id       => $daf->level_id ? $daf->level_id : 1,
             -method_link_species_set => $out_mlss);
        
        my $t_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
            (-dnafrag        => $t_dnafrag,
             -dnafrag_start  => $daf->hstart,
             -dnafrag_end    => $daf->hend,
             -dnafrag_strand => $daf->hstrand,
             -cigar_line     => $t_cigar,
             -level_id       => $daf->level_id ? $daf->level_id : 1,
             -method_link_species_set => $out_mlss);

        my $gen_al_block = Bio::EnsEMBL::Compara::GenomicAlignBlock->new
            (-genomic_align_array     => [$q_genomic_align, $t_genomic_align],
             -score                   => $daf->score,
             -length                  => $al_len,
             -method_link_species_set => $out_mlss,
	     -group_id                => $daf->group_id);
        
        push @chain_of_blocks, $gen_al_block;
      }
    }

    push @chains_of_blocks,\@ chain_of_blocks;
  }
    
  return\@ chains_of_blocks;

}

daf_cigar_from_compara_cigars

description

Top

sub daf_cigar_from_compara_cigars {

  my ($self, $q_cigar, $t_cigar) = @_;

  my (@q_pieces, @t_pieces);
  foreach my $piece (split(/(\d*[MDGI])/, $q_cigar)) {
    next if ($piece !~ /^(\d*)([MDGI])$/);

    my $num = $1; $num = 1 if $num eq "";
    my $type = $2; $type = 'D' if $type ne 'M';

    if ($num > 0) {
      push @q_pieces, { num  => $num,
                        type => $type, 
                      };
    }
  }
  foreach my $piece (split(/(\d*[MDGI])/, $t_cigar)) {
    next if ($piece !~ /^(\d*)([MDGI])$/);
    
    my $num = $1; $num = 1 if $num eq "";
    my $type = $2; $type = 'D' if $type ne 'M';

    if ($num > 0) {
      push @t_pieces, { num  => $num,
                        type => $type,
                      };
    }
  }

  my $daf_cigar = "";

  while(@q_pieces and @t_pieces) {
    # should never be left with a q piece and no target pieces, or vice versa
    my $q = shift @q_pieces;
    my $t = shift @t_pieces;

    if ($q->{num} == $t->{num}) {
      if ($q->{type} eq 'M' and $t->{type} eq 'M') {
        $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M';
      } elsif ($q->{type} eq 'M' and $t->{type} eq 'D') {
        $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'I';
      } elsif ($q->{type} eq 'D' and $t->{type} eq 'M') {
        $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D';
      } else {
        # must be a delete in both seqs; warn and ignore
        warn("The following cigars have a simultaneous gap:\n" . 
             $q_cigar . "\n". 
             $t_cigar . "\n");
      }
    } elsif ($q->{num} > $t->{num}) {
      if ($q->{type} ne 'M') {
        warn("The following cigars are strange:\n" . 
             $q_cigar . "\n". 
             $t_cigar . "\n");
      }
      
      if ($t->{type} eq 'M') {
        $daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'M';
      } elsif ($t->{type} eq 'D') {
        $daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'I';
      } 

      unshift @q_pieces, { 
        type => 'M',
        num  => $q->{num} - $t->{num}, 
      };

    } else {
      # $t->{num} > $q->{num}
      if ($t->{type} ne 'M') {
        warn("The following cigars are strange:\n" . 
             $q_cigar . "\n". 
             $t_cigar . "\n");
      }
      
      if ($q->{type} eq 'M') {
        $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M';
      } elsif ($q->{type} eq 'D') {
        $daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D';
      } 
      unshift @t_pieces, { 
        type => 'M',
        num  => $t->{num} - $q->{num},
      };
    } 
  }

  # final sanity checks

  if (@q_pieces or @t_pieces) {
    warn("Left with dangling pieces in the following cigars:\n" .
          $q_cigar . "\n". 
          $t_cigar . "\n");
    return undef;
  }
  
  my $last_type;
  foreach my $piece (split(/(\d*[MDI])/, $daf_cigar)) {
    next if not $piece;
    my ($type) = ($piece =~ /\d*([MDI])/);

    if (defined $last_type and
       (($last_type eq 'I' and $type eq 'D') or
        ($last_type eq 'D' and $type eq 'I'))) {

      warn("Adjacent Insert/Delete in the following cigars:\n" .
           $q_cigar . "\n". 
           $t_cigar . "\n".
           $daf_cigar . "\n");

      return undef;
    }
    $last_type = $type;
  }
  
  return $daf_cigar;

}

delete_alignments

description

Top

sub delete_alignments {

  my ($self, $mlss, $qy_dnafrag, $tg_dnafrag) = @_;

  my $dbc = $self->compara_dba->dbc;
  my $sql = "SELECT ga1.genomic_align_block_id, ga1.genomic_align_id, ga2.genomic_align_id
      FROM genomic_align ga1, genomic_align ga2
      WHERE ga1.genomic_align_block_id=ga2.genomic_align_block_id
      AND ga1.genomic_align_id != ga2.genomic_align_id
      AND ga1.dnafrag_id = ?
      AND ga1.method_link_species_set_id = ?";
  if (defined $tg_dnafrag) {
    $sql .= " AND ga2.dnafrag_id = ?";
  }


  my $sth = $dbc->prepare($sql);
  if (defined $tg_dnafrag) {
    $sth->execute( $qy_dnafrag->dbID, $mlss->dbID, $tg_dnafrag->dbID);
  } else {
    $sth->execute( $qy_dnafrag->dbID, $mlss->dbID);
  }

  my $nb_gabs = 0;
  my @gabs;
  while (my $aref = $sth->fetchrow_arrayref) {
    my ($gab_id, $ga_id1, $ga_id2) = @$aref;
    push @gabs, [$gab_id, $ga_id1, $ga_id2];
    $nb_gabs++;
  }

  my $sql_gab = "delete from genomic_align_block where genomic_align_block_id in ";
  my $sql_ga = "delete from genomic_align where genomic_align_id in ";

  for (my $i=0; $i < scalar @gabs; $i=$i+20000) {
    my (@gab_ids, @ga1_ids, @ga2_ids);
    for (my $j = $i; ($j < scalar @gabs && $j < $i+20000); $j++) {
      push @gab_ids, $gabs[$j][0];
      push @ga1_ids, $gabs[$j][1];
      push @ga2_ids, $gabs[$j][2];
    }
    my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ")";
    my $sql_ga_to_exec1 = $sql_ga . "(" . join(",", @ga1_ids) . ")";
    my $sql_ga_to_exec2 = $sql_ga . "(" . join(",", @ga2_ids) . ")";

    foreach my $sql ($sql_gab_to_exec,$sql_ga_to_exec1,$sql_ga_to_exec2) {
      my $sth = $dbc->prepare($sql);
      $sth->execute;
      $sth->finish;
    }
  }
}


###################################

}

fetch_input

description

Top

sub fetch_input {

  my ($self) = @_;

  $self->compara_dba(Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor
                     ->new(-dbconn => $self->db->dbc ));

}

get_params

description

Top

sub get_params {

  my ($self, $param_string) = @_;

    return unless($param_string);
  #print("parsing parameter string : ",$param_string,"\n");

  my $params = eval($param_string);
  return unless($params);

  # from parameters
  if(defined($params->{'input_method_link'})) {
    $self->INPUT_METHOD_LINK_TYPE($params->{'input_method_link'});
  }
  if(defined($params->{'output_method_link'})) {
    $self->OUTPUT_METHOD_LINK_TYPE($params->{'output_method_link'});
  }
  if (defined($params->{'max_gap'})) {
    $self->MAX_GAP($params->{'max_gap'});
  }
  if (defined($params->{'min_chain_score'})) {
    $self->MIN_CHAIN_SCORE($params->{'min_chain_score'});
  }

}

new

description

Top

sub new {

  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
 
  $self->query_DnaFrag_hash({});
  $self->target_DnaFrag_hash({});

  $self->get_params($self->analysis->parameters);
  $self->get_params($self->input_id);

  return $self;

}

output

description

Top

sub output {

  my ($self, $output) = @_;
  if(!$self->{output}){
    $self->{output} = [];
  }
  if($output){
    if(ref($output) ne 'ARRAY'){
      throw('Must pass RunnableDB:output an array ref not a '.$output);
    }
    push(@{$self->{output}}, @$output);
  }
  return $self->{output};
}



#################################
# common config variable holders
#################################

}

output_MethodLinkSpeciesSet

description

Top

sub output_MethodLinkSpeciesSet {

  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_out_mlss} = $val;
  }

  return $self->{_out_mlss};

}

query_DnaFrag_hash

description

Top

sub query_DnaFrag_hash {

  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_q_dna_frags} = $val;
  }
  
  return $self->{_q_dna_frags};

}

run

description

Top

sub run {

  my ($self) = @_;
  foreach my $runnable(@{$self->runnable}){
    $runnable->run;
    my $converted_output = $self->convert_output($runnable->output);
    $self->output($converted_output);
    rmdir($runnable->workdir) if (defined $runnable->workdir);
  }

}

sort_chains_by_max_block_score

description

Top

sub sort_chains_by_max_block_score {

  my ($self, $chains) = @_;

  # sort the chains by maximum score
  my @chain_hashes;
  foreach my $chain (@$chains) {
    my $chain_hash = { chain => $chain };
    foreach my $block (@$chain) {
      if (not exists $chain_hash->{qname}) {
        $chain_hash->{qname} = $block->seqname;
        $chain_hash->{tname} = $block->hseqname;
      }
      if (not exists $chain_hash->{score} or
          $block->score > $chain_hash->{score}) {
        $chain_hash->{score} = $block->score;
      }
    }
    push @chain_hashes, $chain_hash;
  }
  
  my @sorted = map { $_->{chain}} sort {
    $b->{score} <=> $a->{score} 
    or $a->{qname} cmp $b->{qname}
    or $a->{tname} cmp $b->{tname}
  } @chain_hashes;

  return\@ sorted;
}


###########################################
# feature splitting
###########################################

}

split_feature

description

Top

sub split_feature {

  my ($self, $f, $max_gap) = @_;

  my @split_dafs;
  
  my $need_to_split = 0;

  my @pieces = split(/(\d*[MDI])/, $f->cigar_string);
  foreach my $piece ( @pieces ) {
    next if ($piece !~ /^(\d*)([MDI])$/);
    my $num = ($1 or 1);
    my $type = $2;

    if (($type eq "I" or $type eq "D") and $num >= $max_gap) {
      $need_to_split = 1;
      last;
    }
  }
  
  if ($need_to_split) {
    my (@new_feats);
    foreach my $ug (sort {$a->start <=> $b->start} $f->ungapped_features) {
      if (@new_feats) {
        my ($dist, $hdist);

        my $last_ug = $new_feats[-1]->[-1];

        if ($ug->end < $last_ug->start) {
          # blocks in reverse orienation
          $dist = $last_ug->start - $ug->end - 1;
        } else {
          # blocks in forward orienatation
          $dist = $ug->start - $last_ug->end - 1;
        }
        if ($ug->hend < $last_ug->hstart) {
          # blocks in reverse orienation
          $hdist = $last_ug->hstart - $ug->hend - 1;
        } else {
          # blocks in forward orienatation
          $hdist = $ug->hstart - $last_ug->hend - 1;
        }

        if ($dist >= $max_gap or $hdist >= $max_gap) {
          push @new_feats, [];
        }
      } else {
        push @new_feats, [];
      }
      push @{$new_feats[-1]}, $ug;
    }
    
    foreach my $mini_list (@new_feats) {
      push @split_dafs, Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => $mini_list);
    }

  } else {
    @split_dafs = ($f)
  }  

  return @split_dafs;
}

############################################
# cigar conversion
############################################

}

target_DnaFrag_hash

description

Top

sub target_DnaFrag_hash {

  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_t_dna_frags} = $val;
  }
  
  return $self->{_t_dna_frags};

}

write_output

description

Top

sub write_output {

  my($self) = @_;

  my $compara_dba = $self->compara_dba;
  my @gen_al_groups;
  foreach my $chain (@{$self->output}) {
      my $group_id;

      #store first block
      my $first_block = shift @$chain;

      $compara_dba->get_GenomicAlignBlockAdaptor->store($first_block);
    
      #Set the group_id if one doesn't already exist ie for chains, to be the
      #dbID of the first genomic_align_block. For nets,the group_id has already
      #been set and is the same as it's chain.
      unless (defined($first_block->group_id)) {
	  $group_id = $first_block->dbID;
	  $compara_dba->get_GenomicAlignBlockAdaptor->store_group_id($first_block, $group_id);
      }

      #store the rest of the genomic_align_blocks
      foreach my $block (@$chain) {
	  if (defined $group_id) {
	      $block->group_id($group_id);
	  }
	  $compara_dba->get_GenomicAlignBlockAdaptor->store($block);
     }
  }
}

###########################################
# chain sorting
###########################################

}

General documentation

No general documentation available.