Bio::EnsEMBL::Analysis::RunnableDB MakeSimilarityInputIDs
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::BlastMiniGenewise qw ( GENEWISE_CONFIG_BY_LOGIC )
Bio::EnsEMBL::Analysis::RunnableDB
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Analysis::Tools::Logger qw ( logger_info )
Bio::EnsEMBL::Analysis::Tools::Utilities qw ( parse_config )
Bio::EnsEMBL::KillList::KillList
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Synopsis
No synopsis!
Description
No description!
Methods
BIOTYPES_TO_MASK
No description
Code
BLASTMINIGENEWISE_PARAMETERS
No description
Code
EXONERATE_PARAMETERS
No description
Code
EXON_BASED_MASKING
No description
Code
FEATURE_RANGE_PADDING
No description
Code
FILTER_OBJECT
No description
Code
FILTER_PARAMS
No description
Code
GENEWISE_PARAMETERS
No description
Code
GENE_BASED_MASKING
No description
Code
GENE_SOURCE_DB
No description
Code
LIMIT_TO_FEATURE_RANGE
No description
Code
MINIGENEWISE_PARAMETERS
No description
Code
MULTIMINIGENEWISE_PARAMETERS
No description
Code
OUTPUT_BIOTYPE
No description
Code
OUTPUT_DB
No description
Code
PAF_LOGICNAMES
No description
Code
PAF_MIN_SCORE_THRESHOLD
No description
Code
PAF_SOURCE_DB
No description
Code
PAF_UPPER_SCORE_THRESHOLD
No description
Code
POST_GENEWISE_MASK
No description
Code
PRE_GENEWISE_MASK
No description
Code
REJECTED_BIOTYPE
No description
Code
REPEATMASKING
No description
Code
SEQFETCHER_OBJECT
No description
Code
SEQFETCHER_PARAMS
No description
Code
SOFTMASKING
No description
Code
USE_KILL_LIST
No description
Code
WRITE_REJECTED
No description
Code
bmg_logicname
No description
Code
fetch_input
No description
Code
gene_slice
No description
Code
gene_source_db
No description
Code
kill_list
No description
Code
new
No description
Code
output_analysis
No description
Code
output_logicname
No description
Code
paf_slice
No description
Code
paf_source_db
No description
Code
protein_count
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
BIOTYPES_TO_MASKdescriptionprevnextTop
sub BIOTYPES_TO_MASK {
  my ($self, $arg) = @_;
  if($arg){
    $self->{BIOTYPES_TO_MASK} = $arg;
  }
  return $self->{BIOTYPES_TO_MASK}
}
BLASTMINIGENEWISE_PARAMETERSdescriptionprevnextTop
sub BLASTMINIGENEWISE_PARAMETERS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{BLASTMINIGENEWISE_PARAMETERS} = $arg;
  }
  return $self->{BLASTMINIGENEWISE_PARAMETERS}
}
EXONERATE_PARAMETERSdescriptionprevnextTop
sub EXONERATE_PARAMETERS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{EXONERATE_PARAMETERS} = $arg;
  }
  return $self->{EXONERATE_PARAMETERS}
}


1;
}
EXON_BASED_MASKINGdescriptionprevnextTop
sub EXON_BASED_MASKING {
  my ($self, $arg) = @_;
  if($arg){
    $self->{EXON_BASED_MASKING} = $arg;
  }
  return $self->{EXON_BASED_MASKING}
}
FEATURE_RANGE_PADDINGdescriptionprevnextTop
sub FEATURE_RANGE_PADDING {
  my ($self, $arg) = @_;
  if($arg){
    $self->{FEATURE_RANGE_PADDING} = $arg;
  }
  return $self->{FEATURE_RANGE_PADDING}
}
FILTER_OBJECTdescriptionprevnextTop
sub FILTER_OBJECT {
  my ($self, $arg) = @_;
  if($arg){
    $self->{FILTER_OBJECT} = $arg;
  }
  return $self->{FILTER_OBJECT}
}
FILTER_PARAMSdescriptionprevnextTop
sub FILTER_PARAMS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{FILTER_PARAMETERS} = $arg;
  }
  return $self->{FILTER_PARAMETERS}
}
GENEWISE_PARAMETERSdescriptionprevnextTop
sub GENEWISE_PARAMETERS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{GENEWISE_PARAMETERS} = $arg;
  }
  return $self->{GENEWISE_PARAMETERS}
}
GENE_BASED_MASKINGdescriptionprevnextTop
sub GENE_BASED_MASKING {
  my ($self, $arg) = @_;
  if($arg){
    $self->{GENE_BASED_MASKING} = $arg;
  }
  return $self->{GENE_BASED_MASKING}
}
GENE_SOURCE_DBdescriptionprevnextTop
sub GENE_SOURCE_DB {
  my ($self, $arg) = @_;
  if($arg){
    $self->{GENE_SOURCE_DB} = $arg;
  }
  return $self->{GENE_SOURCE_DB}
}
LIMIT_TO_FEATURE_RANGEdescriptionprevnextTop
sub LIMIT_TO_FEATURE_RANGE {
  my ($self, $arg) = @_;
  if($arg){
    $self->{LIMIT_TO_FEATURE_RANGE} = $arg;
  }
  return $self->{LIMIT_TO_FEATURE_RANGE}
}
MINIGENEWISE_PARAMETERSdescriptionprevnextTop
sub MINIGENEWISE_PARAMETERS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{MINIGENEWISE_PARAMETERS} = $arg;
  }
  return $self->{MINIGENEWISE_PARAMETERS}
}
MULTIMINIGENEWISE_PARAMETERSdescriptionprevnextTop
sub MULTIMINIGENEWISE_PARAMETERS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{MULTIMINIGENEWISE_PARAMETERS} = $arg;
  }
  return $self->{MULTIMINIGENEWISE_PARAMETERS}
}
OUTPUT_BIOTYPEdescriptionprevnextTop
sub OUTPUT_BIOTYPE {
  my ($self, $arg) = @_;
  if($arg){
    $self->{OUTPUT_BIOTYPE} = $arg;
  }
  return $self->{OUTPUT_BIOTYPE}
}
OUTPUT_DBdescriptionprevnextTop
sub OUTPUT_DB {
  my ($self, $arg) = @_;
  if($arg){
    $self->{OUTPUT_DB} = $arg;
  }
  return $self->{OUTPUT_DB}
}
PAF_LOGICNAMESdescriptionprevnextTop
sub PAF_LOGICNAMES {
  my ($self, $arg) = @_;
  if($arg){
    $self->{PAF_LOGICNAMES} = $arg;
  }
  return $self->{PAF_LOGICNAMES}
}
PAF_MIN_SCORE_THRESHOLDdescriptionprevnextTop
sub PAF_MIN_SCORE_THRESHOLD {
  my ($self, $arg) = @_;
  if($arg){
    $self->{PAF_MIN_SCORE_THRESHOLD} = $arg;
  }
  return $self->{PAF_MIN_SCORE_THRESHOLD}
}
PAF_SOURCE_DBdescriptionprevnextTop
sub PAF_SOURCE_DB {
  my ($self, $arg) = @_;
  if($arg){
    $self->{PAF_SOURCE_DB} = $arg;
  }
  return $self->{PAF_SOURCE_DB}
}
PAF_UPPER_SCORE_THRESHOLDdescriptionprevnextTop
sub PAF_UPPER_SCORE_THRESHOLD {
  my ($self, $arg) = @_;
  if($arg){
    $self->{PAF_UPPER_SCORE_THRESHOLD} = $arg;
  }
  return $self->{PAF_UPPER_SCORE_THRESHOLD}
}
POST_GENEWISE_MASKdescriptionprevnextTop
sub POST_GENEWISE_MASK {
  my ($self, $arg) = @_;
  if($arg){
    $self->{POST_GENEWISE_MASK} = $arg;
  }
  return $self->{POST_GENEWISE_MASK}
}
PRE_GENEWISE_MASKdescriptionprevnextTop
sub PRE_GENEWISE_MASK {
  my ($self, $arg) = @_;
  if($arg){
    $self->{PRE_GENEWISE_MASK} = $arg;
  }
  return $self->{PRE_GENEWISE_MASK}
}
REJECTED_BIOTYPEdescriptionprevnextTop
sub REJECTED_BIOTYPE {
  my ($self, $arg) = @_;
  if($arg){
    $self->{REJECTED_BIOTYPE} = $arg;
  }
  return $self->{REJECTED_BIOTYPE};
}
REPEATMASKINGdescriptionprevnextTop
sub REPEATMASKING {
  my ($self, $arg) = @_;
  if($arg){
    $self->{REPEATMASKING} = $arg;
  }
  return $self->{REPEATMASKING}
}
SEQFETCHER_OBJECTdescriptionprevnextTop
sub SEQFETCHER_OBJECT {
  my ($self, $arg) = @_;
  if($arg){
    $self->{SEQFETCHER_OBJECT} = $arg;
  }
  return $self->{SEQFETCHER_OBJECT}
}
SEQFETCHER_PARAMSdescriptionprevnextTop
sub SEQFETCHER_PARAMS {
  my ($self, $arg) = @_;
  if($arg){
    $self->{SEQFETCHER_PARAMS} = $arg;
  }
  return $self->{SEQFETCHER_PARAMS}
}
SOFTMASKINGdescriptionprevnextTop
sub SOFTMASKING {
  my ($self, $arg) = @_;
  if($arg){
    $self->{SOFTMASKING} = $arg;
  }
  return $self->{SOFTMASKING}
}
USE_KILL_LISTdescriptionprevnextTop
sub USE_KILL_LIST {
  my ($self, $arg) = @_;
  if($arg){
    $self->{USE_KILL_LIST} = $arg;
  }
  return $self->{USE_KILL_LIST}
}
WRITE_REJECTEDdescriptionprevnextTop
sub WRITE_REJECTED {
  my ($self, $arg) = @_;
  if(defined($arg)){
    $self->{WRITE_REJECTED} = $arg;
  }
  return $self->{WRITE_REJECTED};
}
bmg_logicnamedescriptionprevnextTop
sub bmg_logicname {
  my ($self, $value) = @_;
  if($value){
    $self->{'bmg_logicname'} = $value;
  }
  return $self->{'bmg_logicname'};
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;
  print "\n\n***Fetching sequence from ".$self->db->dbname."***\n\n";
  $self->query($self->fetch_sequence($self->input_id, $self->db, $self->REPEATMASKING));
  my $output_analysis = $self->db->get_AnalysisAdaptor->
    fetch_by_logic_name($self->output_logicname);
  throw("Failed to find an analysis with the logic_name ".
        $self->output_logicname." Cannot continue") 
    if(!$output_analysis);
  $self->output_analysis($output_analysis);
}
gene_slicedescriptionprevnextTop
sub gene_slice {
  my ($self, $slice) = @_;

  if($slice){
    $self->{gene_slice} = $slice;
  }
  if(!$self->{gene_slice}){
    my $slice = $self->fetch_sequence($self->input_id, $self->gene_source_db,
                                      $self->REPEATMASKING);
    $self->{gene_slice} = $slice;
  }
  return $self->{gene_slice};
}
gene_source_dbdescriptionprevnextTop
sub gene_source_db {
  my ($self, $db) = @_;
  if($db){
    $self->{gene_source_db} = $db;
  }
  if(!$self->{gene_source_db}){
    my $db = $self->get_dbadaptor($self->GENE_SOURCE_DB);
    $self->{gene_source_db} = $db;
  }
  return $self->{gene_source_db};
}
kill_listdescriptionprevnextTop
sub kill_list {
  my ($self, $arg) = @_;

  if($arg){
    $self->{kill_list} = $arg;
  }
  if(!$self->{kill_list}){
    my $kill_list_object = Bio::EnsEMBL::KillList::KillList
      ->new(-TYPE => 'protein');
    $self->{kill_list} = $kill_list_object->get_kill_list;
  }
  return $self->{kill_list};
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  my ($output_logicname, $protein_count, $bmg_logicname) = rearrange
    (['OUTPUT_LOGICNAME', 'PROTEIN_COUNT', 'BMG_LOGICNAME'], @args);
  #output logicname
#protein count
#bmg logicname
#####
#setting a default
#####
$self->protein_count(20); #####
### Defaults are over-ridden by parameters given in analysis table...
my $ph = $self->parameters_hash; $self->protein_count($ph->{-protein_count}); $self->output_logicname($ph->{-output_logicname}); $self->bmg_logicname($ph->{-bmg_logicname}); ### ...which are over-ridden by constructor arguments.
$self->protein_count($protein_count); $self->output_logicname($output_logicname); $self->bmg_logicname($bmg_logicname); throw("Need an output logicname ".$self->output_logicname." and a bmg logicname ". $self->bmg_logicname." defined") if(!$self->output_logicname || !$self->bmg_logicname); parse_config($self, $GENEWISE_CONFIG_BY_LOGIC, $self->bmg_logicname); return $self;
}
output_analysisdescriptionprevnextTop
sub output_analysis {
  my ($self, $value) = @_;
  if($value && 
     $value->isa('Bio::EnsEMBL::Pipeline::Analysis')){
    $self->{output_analysis} = $value;
  }
  return $self->{'output_analysis'};
}
output_logicnamedescriptionprevnextTop
sub output_logicname {
  my ($self, $value) = @_;
  if($value){
    $self->{'output_logicname'} = $value;
  }
  return $self->{'output_logicname'};
}
paf_slicedescriptionprevnextTop
sub paf_slice {
  my ($self, $slice) = @_;

  if($slice){
    $self->{paf_slice} = $slice;
  }
  if(!$self->{paf_slice}){
    print "FETCHING SEQUENCE FROM ".$self->paf_source_db->dbc->dbname."\n";
    my $slice = $self->fetch_sequence($self->input_id, $self->paf_source_db,
                                      $self->REPEATMASKING);
    $self->{paf_slice} = $slice;
  }
  return $self->{paf_slice};
}
paf_source_dbdescriptionprevnextTop
sub paf_source_db {
  my ($self, $db) = @_;
  if($db){
    $self->{paf_source_db} = $db;
  }
  if(!$self->{paf_source_db}){
    my $db = $self->get_dbadaptor($self->PAF_SOURCE_DB);
    $self->{paf_source_db} = $db;
  }
  return $self->{paf_source_db};
}
protein_countdescriptionprevnextTop
sub protein_count {
  my ($self, $value) = @_;
  if($value){
    $self->{'protein_count'} = $value;
  }
  return $self->{'protein_count'};
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
  my %kill_list =  %{$self->kill_list} if($self->USE_KILL_LIST);
  my @mask_exons;
  my @iids;
  # remove masked and killed hits as will be done in the build itself
foreach my $type ( @{$self->BIOTYPES_TO_MASK} ) { foreach my $mask_genes ( @{ $self->gene_slice->get_all_Genes_by_type($type) } ) { foreach my $mask_exon ( @{ $mask_genes->get_all_Exons } ) { if ( $mask_exon->seqname eq $self->gene_slice->id ) { push @mask_exons, $mask_exon; } } } } # make the mask list non-redundant. Much faster when checking against features
my @mask_regions; foreach my $mask_exon ( sort { $a->start <=> $b->start } @mask_exons ) { if ( @mask_regions and $mask_regions[-1]->{'end'} > $mask_exon->start ) { if ( $mask_exon->end > $mask_regions[-1]->{'end'} ) { $mask_regions[-1]->{'end'} = $mask_exon->end; } } else { push @mask_regions, { start => $mask_exon->start, end => $mask_exon->end } } } my $num_seeds = 0; foreach my $logicname(@{$self->PAF_LOGICNAMES}) { my %features; print "FETCHING FEATURES FOR ".$logicname."\n"; my @features = @{$self->paf_slice->get_all_ProteinAlignFeatures($logicname)}; print "HAVE ".@features." features\n"; FEATURE:foreach my $f(@features){ next FEATURE if($self->PAF_MIN_SCORE_THRESHOLD && $f->score < $self->PAF_MIN_SCORE_THRESHOLD); next FEATURE if($self->PAF_UPPER_SCORE_THRESHOLD && $f->score > $self->PAF_UPPER_SCORE_THRESHOLD); push(@{$features{$f->hseqname}}, $f); } my @ids_to_ignore; SEQID: foreach my $sid ( keys %features ) { my $ex_idx = 0; my $count = 0; #print STDERR "Looking at $sid\n";
FEAT: foreach my $f ( sort { $a->start <=> $b->start } @{ $features{$sid} } ) { #printf STDERR "Feature: %d %d\n", $f->start, $f->end;
for ( ; $ex_idx < @mask_regions ; ) { my $mask_exon = $mask_regions[$ex_idx]; #printf STDERR " Mask exon %d %d\n", $mask_exon->{'start'}, $mask_exon->{'end'};
if ( $mask_exon->{'start'} > $f->end ) { # no exons will overlap this feature
next FEAT; } elsif ( $mask_exon->{'end'} >= $f->start ) { # overlap
push @ids_to_ignore, $f->hseqname; #printf STDERR "Ignoring %s\n", $f->hseqname;
next SEQID; } else { $ex_idx++; } } } } print "Ignoring ".@ids_to_ignore." features\n"; foreach my $dud_id ( @ids_to_ignore, keys %kill_list ) { if ( exists $features{$dud_id} ) { delete $features{$dud_id}; } } $num_seeds += scalar( keys %features ); } # rule of thumb; split data so that each job constitutes one piece of
# genomic DNA against ~20 proteins.
#
return () if($num_seeds == 0); my $num_chunks = int( $num_seeds / $self->protein_count )
+ 1;
for ( my $x = 1 ; $x <= $num_chunks ; $x++ ) { #generate input id : $chr_name.1-$chr_length:$num_chunks:$x
my $new_iid = $self->query->name . ":$num_chunks:$x"; push @iids, $new_iid; } print "HAVE ".@iids." to write to the ref database\n"; $self->output(\@iids);
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self) = @_;
  my $sic = $self->db->get_StateInfoContainer;
  foreach my $iid(@{$self->output}){
    eval{
      $sic->store_input_id_analysis($iid, 
                                    $self->output_analysis, 
                                    '');
    };
    throw("Failed to store ".$iid." $@") if($@);
    logger_info("Stored ".$iid);
  }
}
General documentation
CONFIG_ACCESSOR_METHODSTop
  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::BlastMiniGenewise
Arg [2] : Varies, tends to be boolean, a string, a arrayref or a hashref
Function : Getter/Setter for config variables
Returntype: again varies
Exceptions:
Example :