Bio::EnsEMBL::Analysis::RunnableDB LayerAnnotation
Other packages in the module: Bio::EnsEMBL::Analysis::RunnableDB::LayerAnnotation
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
LayerAnnotation - DESCRIPTION of Object
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::LayerAnnotation
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Unavailable
Synopsis
Description
Methods
LAYERS
No description
Code
SOURCEDB_REFS
No description
Code
TARGETDB_REF
No description
Code
fetch_input
No description
Code
generic_filter
No description
Code
layers
No description
Code
new
No description
Code
read_and_check_config
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
LAYERSdescriptionprevnextTop
sub LAYERS {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_layers} = $val;
  }

  return $self->{_layers};
}
SOURCEDB_REFSdescriptionprevnextTop
sub SOURCEDB_REFS {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_inputdb_names} = $val;
  }

  return $self->{_inputdb_names};
}
TARGETDB_REFdescriptionprevnextTop
sub TARGETDB_REF {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_outputdb_name} = $val;
  }

  return $self->{_outputdb_name};
}

##############################################
package Bio::EnsEMBL::Analysis::RunnableDB::LayerAnnotation::Layer; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Argument qw (rearrange);
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;  

  foreach my $dbname (@{$self->SOURCEDB_REFS}) {
    my $dbh = $self->get_dbadaptor($dbname);
    my $slice = $dbh->get_SliceAdaptor->fetch_by_name($self->input_id);
    my $tlslice = $dbh->get_SliceAdaptor->fetch_by_region($slice->coord_system->name,
                                                          $slice->seq_region_name);

    foreach my $layer (@{$self->layers}) {
      foreach my $tp (@{$layer->biotypes}) {
        foreach my $g (@{$slice->get_all_Genes_by_type($tp)}) {
          $g = $g->transfer($tlslice);
          push @{$layer->genes}, $g;
        }
      }
    }
  }
}


#####################################
}
generic_filterdescriptionprevnextTop
sub generic_filter {
  my ($self, $these, $others) = @_;

  # interference is judged by overlap at exon level
# assumption is that @others is sorted by gene start
my @filtered; my $cur_idx = 0; foreach my $obj (@$these) { my (@genomic_overlap, $left_bound); for(my $i=$cur_idx; $i < @$others; $i++) { my $o_obj = $others->[$i]; if ($o_obj->end >= $obj->start and not defined $left_bound) { $left_bound = $i; } if ($o_obj->end < $obj->start) { next; } elsif ($o_obj->start > $obj->end) { last; } else { push @genomic_overlap, $o_obj; } } $cur_idx = $left_bound if defined $left_bound; my $exon_overlap = 0; if (@genomic_overlap) { my @exons = @{$obj->get_all_Exons}; OG: foreach my $o_obj (@genomic_overlap) { foreach my $oe (@{$o_obj->get_all_Exons}) { foreach my $e (@exons) { if ($oe->strand == $e->strand and $oe->end >= $e->start and $oe->start <= $e->end) { $exon_overlap = 1; last OG; } } } } } if (not $exon_overlap) { push @filtered, $obj; } } return\@ filtered; } ####################################
}
layersdescriptionprevnextTop
sub layers {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_runnabledb_layers} = $val;
  }

  return $self->{_runnabledb_layers};
}



####################################
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);

  $self->read_and_check_config($LAYERANNOTATION_CONFIG_BY_LOGIC);
           
  # other stuff here
return $self; } ###################################
}
read_and_check_configdescriptionprevnextTop
sub read_and_check_config {
  my ($self, $hash) = @_;

  $self->SUPER::read_and_check_config($hash);

  # check that all relevant vars have been defined
foreach my $i (qw(LAYERS SOURCEDB_REFS TARGETDB_REF)) { throw("You must define $i in config") if not $self->$i; } # check types
throw("Config var LAYERS must be an array reference") if ref($self->LAYERS) ne "ARRAY"; throw("Config var SOURCEDB_REFS must be an array reference") if ref($self->SOURCEDB_REFS) ne "ARRAY"; throw("Config var TARGETDB_REF must be an scalar") if ref($self->TARGETDB_REF); my (%biotypes, %layer_ids, @layers); # finally, check the integrity of the LAYERS
foreach my $el (@{$self->LAYERS}) { throw("Elements of LAYERS must be hash references") if ref($el) ne "HASH"; throw("Each element of LAYERS must contain a layer ID") if not exists $el->{ID}; throw("LAYER " . $el->{ID} . " should a list of BIOTYPES") if not exists $el->{BIOTYPES} or ref($el->{BIOTYPES}) ne "ARRAY"; my $layer_id = $el->{ID}; my @biotypes = @{$el->{BIOTYPES}}; my $discard = 0; my ($filter, @filter_against); if (exists $el->{DISCARD} and $el->{DISCARD}) { $discard = 1; } foreach my $tp (@biotypes) { if (exists $biotypes{$tp}) { throw("biotype $tp occurs more than once"); } $biotypes{$tp} = 1; } if (exists $el->{FILTER_AGAINST}) { throw("In layer $layer_id FILTER_AGAINST must contain a list of layer ids") if ref($el->{FILTER_AGAINST}) ne "ARRAY"; @filter_against = @{$el->{FILTER_AGAINST}}; foreach my $id (@filter_against) { throw("In FILTER_AGAINST in layer $layer_id, '$id' is not the name ". "of a higher level layer") if not exists $layer_ids{$id}; } } if (exists $el->{FILTER}) { $self->require_module($el->{FILTER}); $filter = $el->{FILTER}->new; } push @layers, Bio::EnsEMBL::Analysis::RunnableDB::LayerAnnotation::Layer ->new(-id => $layer_id, -discard => $discard, -biotypes =>\@ biotypes, -filter_object => $filter, -filter_against =>\@ filter_against); $layer_ids{$layer_id} = 1; } $self->layers(\@layers); } ################################################
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;

  my (@retained_genes);

  my @layers = @{$self->layers};

  for(my $i=0; $i < @layers; $i++) {
    my $layer = $layers[$i];

    if ($layer->genes) {
      my @layer_genes = sort {$a->start <=> $b->start} @{$layer->genes};

      my @compare_genes;
      
      my %filter_against = map { $_ => 1 } @{$layer->filter_against};

      for(my $j = $i-1; $j>=0; $j--) {
        if (exists $filter_against{$layers[$j]->id} and
            @{$layers[$j]->genes}) {
          push @compare_genes, @{$layers[$j]->genes};
        }
      }
      @compare_genes = sort {$a->start <=> $b->start} @compare_genes;

      if ($layer->filter_object) {
        @layer_genes = @{$layer->filter_object->filter(\@layer_genes,\@ compare_genes)};
      } else {
        @layer_genes = @{$self->generic_filter(\@layer_genes,\@ compare_genes)};
      }
      
      if (not $layer->discard) {
        push @retained_genes, @layer_genes;
      }

      $layer->genes(\@layer_genes);
    }
  }
  
  $self->output(\@retained_genes);
}


##################################
}
write_outputdescriptionprevnextTop
sub write_output {
  my($self) = @_;
  
  my $target_db = $self->get_dbadaptor($self->TARGETDB_REF);    
  my $g_adap = $target_db->get_GeneAdaptor;

  # fully loading gene is required for the store to work
# reliably. However, fully loading all genes to be stored
# up front is expensive in memory. Therefore, load, store and
# discard one gene at a time
my $out = $self->output; while(my $g = shift @$out) { fully_load_Gene($g); $g_adap->store($g); } return 1; } #####################################
}
General documentation
CONTACTTop
ensembl-dev@ebi.ac.uk