Bio::EnsEMBL::Compara::Production::GenomicAlignBlock SimpleNets
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlign::AlignmentSimple
Package variables
Privates (from "my" definitions)
@ALLOWABLE_METHODS = qw(ContigAwareNet)
Included modules
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing
Bio::EnsEMBL::Hive::Process
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing
Synopsis
  my $db      = Bio::EnsEMBL::DBAdaptor->new($locator);
my $genscan = Bio::EnsEMBL::Compara::Production::GenomicAlign::SimpleNets->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$genscan->fetch_input();
$genscan->run();
$genscan->write_output(); #writes to DB
Description
Given an compara MethodLinkSpeciesSet identifer, and a reference genomic
slice identifer, fetches the GenomicAlignBlocks from the given compara
database, infers chains from the group identifiers, and then forms
an alignment net from the chains and writes the result
back to the database.
This module implements some simple net-inspired functionality directly
in Perl, and does not rely on Jim Kent's original Axt tools
Methods
ContigAwareNet
No description
Code
NET_METHOD
No description
Code
QUERY_DNAFRAG_ID
No description
Code
SUPPORTED_METHOD
No description
Code
TARGET_GENOMEDB_ID
No description
Code
fetch_inputDescriptionCode
get_params
No description
Code
input_chains
No description
Code
query_seq_level_projection
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
fetch_inputcode    nextTop
    Title   :   fetch_input
Usage : $self->fetch_input
Function:
Returns : nothing
Args : none
Methods code
ContigAwareNetdescriptionprevnextTop
sub ContigAwareNet {
  my ($self) = @_;
  
  my $chains = $self->input_chains;

  # assumption 1: chains are sorted from "best" to "worst"
# assumption 2: each chain is sorted from start to end in query (ref) sequence
my (@net_chains, @retained_blocks, %contigs_of_kept_blocks); foreach my $c (@$chains) { my @blocks = @$c; my $keep_chain = 1; BLOCK: foreach my $block (@blocks) { my $qga = $block->reference_genomic_align; #my ($tga) = @{$block->get_all_non_reference_genomic_aligns};
OTHER_BLOCK: foreach my $oblock (@retained_blocks) { my $oqga = $oblock->reference_genomic_align; #my ($otga) = @{$ob->get_all_non_reference_genomic_aligns};
if ($oqga->dnafrag_start <= $qga->dnafrag_end and $oqga->dnafrag_end >= $qga->dnafrag_start) { $keep_chain = 0; last BLOCK; } elsif ($oqga->dnafrag_start > $qga->dnafrag_end) { last OTHER_BLOCK; } } } if ($keep_chain) { my (%contigs_of_blocks, @split_blocks); # the following chops the blocks into pieces such that each block
# lies completely within a sequence-level region (contig). It's rare
# that this is not the case anyway, but it's best to be sure...
foreach my $block (@blocks) { my ($inside_seg, @overlap_segs); my $qga = $block->reference_genomic_align; foreach my $seg (@{$self->query_seq_level_projection}) { if ($qga->dnafrag_start >= $seg->from_start and $qga->dnafrag_end <= $seg->from_end) { $inside_seg = $seg; last; } elsif ($seg->from_start <= $qga->dnafrag_end and $seg->from_end >= $qga->dnafrag_start) { push @overlap_segs, $seg; } elsif ($seg->from_start > $qga->dnafrag_end) { last; } } if (defined $inside_seg) { push @split_blocks, $block; $contigs_of_blocks{$block} = $inside_seg; } else { my @cut_blocks; foreach my $seg (@overlap_segs) { my ($reg_start, $reg_end) = ($qga->dnafrag_start, $qga->dnafrag_end); $reg_start = $seg->from_start if $seg->from_start > $reg_start; $reg_end = $seg->from_end if $seg->from_end < $reg_end; my $cut_block = $block->restrict_between_reference_positions($reg_start, $reg_end); $cut_block->score($block->score); if (defined $cut_block) { push @cut_blocks, $cut_block; $contigs_of_blocks{$cut_block} = $seg; } } push @split_blocks, @cut_blocks; } } @blocks = @split_blocks; # only retain blocks that lie on different contigs from all retained
# blocks so far
my @diff_contig_blocks; my %kept_contigs = reverse %contigs_of_kept_blocks; foreach my $block (@blocks) { if (not exists $kept_contigs{$contigs_of_blocks{$block}}) { push @diff_contig_blocks, $block; } } # calculate what proportion of the overall chain remains; reject if
# the proportion is less than 50%
my $kept_len = 0; my $total_len = 0; map { $kept_len += $_->reference_genomic_align->dnafrag_end - $_->reference_genomic_align->dnafrag_start + 1; } @diff_contig_blocks; map { $total_len += $_->reference_genomic_align->dnafrag_end - $_->reference_genomic_align->dnafrag_start + 1; } @blocks; if ($kept_len / $total_len > 0.5) {
foreach my
$bid (keys %contigs_of_blocks) {
$contigs_of_kept_blocks{$bid} = $contigs_of_blocks{$bid};
} push @net_chains,\@ diff_contig_blocks; push @retained_blocks, @diff_contig_blocks; @retained_blocks = sort { $a->reference_genomic_align->dnafrag_start <=> $b->reference_genomic_align->dnafrag_start; } @retained_blocks; } } } # fetch all genomic_aligns from the result blocks to avoid cacheing issues
# when storing
foreach my $ch (@net_chains) { foreach my $bl (@{$ch}) { foreach my $al (@{$bl->get_all_GenomicAligns}) { $al->dnafrag; } } } return\@ net_chains; } #############################
}
NET_METHODdescriptionprevnextTop
sub NET_METHOD {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_net_type} = $val;
  }

  return $self->{_net_type};
}
QUERY_DNAFRAG_IDdescriptionprevnextTop
sub QUERY_DNAFRAG_ID {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_query_dnafrag_id'} = $value;
  }
  return $self->{'_query_dnafrag_id'};
}
SUPPORTED_METHODdescriptionprevnextTop
sub SUPPORTED_METHOD {
  my ($class, $method ) = @_;

  my $allowed = 0;
  foreach my $meth (@ALLOWABLE_METHODS) {
    if ($meth eq $method) {
      $allowed = 1;
      last;
    }
  }

  return $allowed;
}
TARGET_GENOMEDB_IDdescriptionprevnextTop
sub TARGET_GENOMEDB_ID {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_target_genomedb_id'} = $value;
  }
  return $self->{'_target_genomedb_id'};
}




1;
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my( $self) = @_; 

  $self->SUPER::fetch_input;
  $self->compara_dba->dbc->disconnect_when_inactive(0);

  my $mlssa = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
  my $dnafa = $self->compara_dba->get_DnaFragAdaptor;
  my $gdba = $self->compara_dba->get_GenomeDBAdaptor;
  my $gaba = $self->compara_dba->get_GenomicAlignBlockAdaptor;

  $self->get_params($self->analysis->parameters);
  $self->get_params($self->input_id);

  ################################################################
# get the compara data: MethodLinkSpeciesSet, reference DnaFrag,
# and GenomicAlignBlocks
################################################################
my $qy_dnafrag; if ($self->QUERY_DNAFRAG_ID) { $qy_dnafrag = $dnafa->fetch_by_dbID($self->QUERY_DNAFRAG_ID); my @seq_level_bits = @{$qy_dnafrag->slice->project('seqlevel')}; $self->query_seq_level_projection(\@seq_level_bits); } throw("Could not fetch DnaFrag with dbID " . $self->QUERY_DNAFRAG_ID ) if not defined $qy_dnafrag; my $tg_gdb; if ($self->TARGET_GENOMEDB_ID) { $tg_gdb = $gdba->fetch_by_dbID($self->TARGET_GENOMEDB_ID); } throw("Could not fetch GenomeDB with dbID " . $self->TARGET_GENOMEDB_ID) if not defined $tg_gdb; my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($self->INPUT_METHOD_LINK_TYPE, [$qy_dnafrag->genome_db, $tg_gdb]); throw("No MethodLinkSpeciesSet for " . $self->INPUT_METHOD_LINK_TYPE) if not defined $mlss; my $out_mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet; $out_mlss->method_link_type($self->OUTPUT_METHOD_LINK_TYPE); $out_mlss->species_set($mlss->species_set); $mlssa->store($out_mlss); ######## needed for output####################
$self->output_MethodLinkSpeciesSet($out_mlss); if ($self->input_job->retry_count > 0) { print STDERR "Deleting alignments as it is a rerun\n"; $self->delete_alignments($out_mlss, $qy_dnafrag); } my $gabs = $gaba->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $qy_dnafrag); ###################################################################
# get the target slices and bin the GenomicAlignBlocks by group id
###################################################################
my %chains; while (my $gab = shift @{$gabs}) { my ($qy_ga) = $gab->reference_genomic_align; my ($tg_ga) = @{$gab->get_all_non_reference_genomic_aligns}; my $group_id = $gab->group_id; if (not exists $chains{$group_id}) { $chains{$group_id} = { score => $gab->score, query_name => $qy_ga->dnafrag->name, query_pos => $qy_ga->dnafrag_start, target_name => $tg_ga->dnafrag->name, target_pos => $tg_ga->dnafrag_start, blocks => [], }; } else { if ($gab->score > $chains{$group_id}->{score}) { $chains{$group_id}->{score} = $gab->score; } if ($chains{$group_id}->{query_pos} > $qy_ga->dnafrag_start) { $chains{$group_id}->{query_pos} = $qy_ga->dnafrag_start; } if ($chains{$group_id}->{target_pos} > $tg_ga->dnafrag_start) { $chains{$group_id}->{target_pos} = $tg_ga->dnafrag_start; } } push @{$chains{$group_id}->{blocks}}, $gab; } # sort the blocks within each chain
foreach my $group_id (keys %chains) { $chains{$group_id}->{blocks} = [sort { $a->reference_genomic_align->dnafrag_start <=> $b->reference_genomic_align->dnafrag_start; } @{$chains{$group_id}->{blocks}}]; } # now sort the chains by score. Ties are resolved by target and location
# to make the sort deterministic
my @chains; foreach my $group_id (sort { $chains{$b}->{score} <=> $chains{$a}->{score} or $chains{$a}->{target_name} cmp $chains{$b}->{target_name} or $chains{$a}->{target_pos} <=> $chains{$b}->{target_pos} or $chains{$a}->{query_pos} <=> $chains{$b}->{query_pos} } keys %chains) { push @chains, $chains{$group_id}->{blocks}; } $self->input_chains(\@chains);
}
get_paramsdescriptionprevnextTop
sub get_params {
  my $self         = shift;
  my $param_string = shift;

  return unless($param_string);
  print("parsing parameter string : ",$param_string,"\n");

  my $params = eval($param_string);
  return unless($params);

  $self->SUPER::get_params($param_string);

  if (defined($params->{'qy_dnafrag_id'})) {
    $self->QUERY_DNAFRAG_ID($params->{'qy_dnafrag_id'});
  }
  if (defined($params->{'tg_genomedb_id'})) {
    $self->TARGET_GENOMEDB_ID($params->{'tg_genomedb_id'});
  }
  if (defined $params->{'net_method'}) {
    $self->NET_METHOD($params->{'net_method'});
  }

  return 1;
}
input_chainsdescriptionprevnextTop
sub input_chains {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_query_chains} = $val;
  }

  return $self->{_query_chains};
}
query_seq_level_projectiondescriptionprevnextTop
sub query_seq_level_projection {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_query_seq_level_bits} = $val;
  }
  return $self->{_query_seq_level_bits};
}



#########################################
# config vars
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;

  my $output;

  if ($self->NET_METHOD) {
    no strict 'refs';

    my $method = $self->NET_METHOD;
    $output = $self->$method;
  } else {
    $output = $self->ContigAwareNet();
  }
 
  $self->cleanse_output($output);
  $self->output($output);
}
write_outputdescriptionprevnextTop
sub write_output {
  my $self = shift;

  my $disconnect_when_inactive_default = $self->db->dbc->disconnect_when_inactive;
  $self->compara_dba->dbc->disconnect_when_inactive(0);
  $self->SUPER::write_output;
  $self->compara_dba->dbc->disconnect_when_inactive($disconnect_when_inactive_default);
}


############################
# specific net methods
###########################
my @ALLOWABLE_METHODS = qw(ContigAwareNet);
}
General documentation
No general documentation available.