Bio::EnsEMBL::Analysis::RunnableDB
MakeSimilarityInputIDs
Toolbar
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::BlastMiniGenewise qw ( GENEWISE_CONFIG_BY_LOGIC )
Bio::EnsEMBL::KillList::KillList
Inherit
Synopsis
No synopsis!
Description
No description!
Methods
BIOTYPES_TO_MASK | No description | Code |
BLASTMINIGENEWISE_PARAMETERS | No description | Code |
EXONERATE_PARAMETERS | No description | Code |
EXON_BASED_MASKING | No description | Code |
FEATURE_RANGE_PADDING | No description | Code |
FILTER_OBJECT | No description | Code |
FILTER_PARAMS | No description | Code |
GENEWISE_PARAMETERS | No description | Code |
GENE_BASED_MASKING | No description | Code |
GENE_SOURCE_DB | No description | Code |
LIMIT_TO_FEATURE_RANGE | No description | Code |
MINIGENEWISE_PARAMETERS | No description | Code |
MULTIMINIGENEWISE_PARAMETERS | No description | Code |
OUTPUT_BIOTYPE | No description | Code |
OUTPUT_DB | No description | Code |
PAF_LOGICNAMES | No description | Code |
PAF_MIN_SCORE_THRESHOLD | No description | Code |
PAF_SOURCE_DB | No description | Code |
PAF_UPPER_SCORE_THRESHOLD | No description | Code |
POST_GENEWISE_MASK | No description | Code |
PRE_GENEWISE_MASK | No description | Code |
REJECTED_BIOTYPE | No description | Code |
REPEATMASKING | No description | Code |
SEQFETCHER_OBJECT | No description | Code |
SEQFETCHER_PARAMS | No description | Code |
SOFTMASKING | No description | Code |
USE_KILL_LIST | No description | Code |
WRITE_REJECTED | No description | Code |
bmg_logicname | No description | Code |
fetch_input | No description | Code |
gene_slice | No description | Code |
gene_source_db | No description | Code |
kill_list | No description | Code |
new | No description | Code |
output_analysis | No description | Code |
output_logicname | No description | Code |
paf_slice | No description | Code |
paf_source_db | No description | Code |
protein_count | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
BIOTYPES_TO_MASK | description | prev | next | Top |
sub BIOTYPES_TO_MASK
{ my ($self, $arg) = @_;
if($arg){
$self->{BIOTYPES_TO_MASK} = $arg;
}
return $self->{BIOTYPES_TO_MASK} } |
sub BLASTMINIGENEWISE_PARAMETERS
{ my ($self, $arg) = @_;
if($arg){
$self->{BLASTMINIGENEWISE_PARAMETERS} = $arg;
}
return $self->{BLASTMINIGENEWISE_PARAMETERS} } |
sub EXONERATE_PARAMETERS
{ my ($self, $arg) = @_;
if($arg){
$self->{EXONERATE_PARAMETERS} = $arg;
}
return $self->{EXONERATE_PARAMETERS}
}
1; } |
sub EXON_BASED_MASKING
{ my ($self, $arg) = @_;
if($arg){
$self->{EXON_BASED_MASKING} = $arg;
}
return $self->{EXON_BASED_MASKING} } |
sub FEATURE_RANGE_PADDING
{ my ($self, $arg) = @_;
if($arg){
$self->{FEATURE_RANGE_PADDING} = $arg;
}
return $self->{FEATURE_RANGE_PADDING} } |
sub FILTER_OBJECT
{ my ($self, $arg) = @_;
if($arg){
$self->{FILTER_OBJECT} = $arg;
}
return $self->{FILTER_OBJECT} } |
sub FILTER_PARAMS
{ my ($self, $arg) = @_;
if($arg){
$self->{FILTER_PARAMETERS} = $arg;
}
return $self->{FILTER_PARAMETERS} } |
sub GENEWISE_PARAMETERS
{ my ($self, $arg) = @_;
if($arg){
$self->{GENEWISE_PARAMETERS} = $arg;
}
return $self->{GENEWISE_PARAMETERS} } |
sub GENE_BASED_MASKING
{ my ($self, $arg) = @_;
if($arg){
$self->{GENE_BASED_MASKING} = $arg;
}
return $self->{GENE_BASED_MASKING} } |
sub GENE_SOURCE_DB
{ my ($self, $arg) = @_;
if($arg){
$self->{GENE_SOURCE_DB} = $arg;
}
return $self->{GENE_SOURCE_DB} } |
sub LIMIT_TO_FEATURE_RANGE
{ my ($self, $arg) = @_;
if($arg){
$self->{LIMIT_TO_FEATURE_RANGE} = $arg;
}
return $self->{LIMIT_TO_FEATURE_RANGE} } |
sub MINIGENEWISE_PARAMETERS
{ my ($self, $arg) = @_;
if($arg){
$self->{MINIGENEWISE_PARAMETERS} = $arg;
}
return $self->{MINIGENEWISE_PARAMETERS} } |
sub MULTIMINIGENEWISE_PARAMETERS
{ my ($self, $arg) = @_;
if($arg){
$self->{MULTIMINIGENEWISE_PARAMETERS} = $arg;
}
return $self->{MULTIMINIGENEWISE_PARAMETERS} } |
sub OUTPUT_BIOTYPE
{ my ($self, $arg) = @_;
if($arg){
$self->{OUTPUT_BIOTYPE} = $arg;
}
return $self->{OUTPUT_BIOTYPE} } |
sub OUTPUT_DB
{ my ($self, $arg) = @_;
if($arg){
$self->{OUTPUT_DB} = $arg;
}
return $self->{OUTPUT_DB} } |
sub PAF_LOGICNAMES
{ my ($self, $arg) = @_;
if($arg){
$self->{PAF_LOGICNAMES} = $arg;
}
return $self->{PAF_LOGICNAMES} } |
sub PAF_MIN_SCORE_THRESHOLD
{ my ($self, $arg) = @_;
if($arg){
$self->{PAF_MIN_SCORE_THRESHOLD} = $arg;
}
return $self->{PAF_MIN_SCORE_THRESHOLD} } |
sub PAF_SOURCE_DB
{ my ($self, $arg) = @_;
if($arg){
$self->{PAF_SOURCE_DB} = $arg;
}
return $self->{PAF_SOURCE_DB} } |
sub PAF_UPPER_SCORE_THRESHOLD
{ my ($self, $arg) = @_;
if($arg){
$self->{PAF_UPPER_SCORE_THRESHOLD} = $arg;
}
return $self->{PAF_UPPER_SCORE_THRESHOLD} } |
sub POST_GENEWISE_MASK
{ my ($self, $arg) = @_;
if($arg){
$self->{POST_GENEWISE_MASK} = $arg;
}
return $self->{POST_GENEWISE_MASK} } |
sub PRE_GENEWISE_MASK
{ my ($self, $arg) = @_;
if($arg){
$self->{PRE_GENEWISE_MASK} = $arg;
}
return $self->{PRE_GENEWISE_MASK} } |
sub REJECTED_BIOTYPE
{ my ($self, $arg) = @_;
if($arg){
$self->{REJECTED_BIOTYPE} = $arg;
}
return $self->{REJECTED_BIOTYPE}; } |
sub REPEATMASKING
{ my ($self, $arg) = @_;
if($arg){
$self->{REPEATMASKING} = $arg;
}
return $self->{REPEATMASKING} } |
sub SEQFETCHER_OBJECT
{ my ($self, $arg) = @_;
if($arg){
$self->{SEQFETCHER_OBJECT} = $arg;
}
return $self->{SEQFETCHER_OBJECT} } |
sub SEQFETCHER_PARAMS
{ my ($self, $arg) = @_;
if($arg){
$self->{SEQFETCHER_PARAMS} = $arg;
}
return $self->{SEQFETCHER_PARAMS} } |
sub SOFTMASKING
{ my ($self, $arg) = @_;
if($arg){
$self->{SOFTMASKING} = $arg;
}
return $self->{SOFTMASKING} } |
sub USE_KILL_LIST
{ my ($self, $arg) = @_;
if($arg){
$self->{USE_KILL_LIST} = $arg;
}
return $self->{USE_KILL_LIST} } |
sub WRITE_REJECTED
{ my ($self, $arg) = @_;
if(defined($arg)){
$self->{WRITE_REJECTED} = $arg;
}
return $self->{WRITE_REJECTED}; } |
sub bmg_logicname
{ my ($self, $value) = @_;
if($value){
$self->{'bmg_logicname'} = $value;
}
return $self->{'bmg_logicname'}; } |
sub fetch_input
{ my ($self) = @_;
print "\n\n***Fetching sequence from ".$self->db->dbname."***\n\n";
$self->query($self->fetch_sequence($self->input_id, $self->db, $self->REPEATMASKING));
my $output_analysis = $self->db->get_AnalysisAdaptor->
fetch_by_logic_name($self->output_logicname);
throw("Failed to find an analysis with the logic_name ".
$self->output_logicname." Cannot continue")
if(!$output_analysis);
$self->output_analysis($output_analysis); } |
sub gene_slice
{ my ($self, $slice) = @_;
if($slice){
$self->{gene_slice} = $slice;
}
if(!$self->{gene_slice}){
my $slice = $self->fetch_sequence($self->input_id, $self->gene_source_db,
$self->REPEATMASKING);
$self->{gene_slice} = $slice;
}
return $self->{gene_slice}; } |
sub gene_source_db
{ my ($self, $db) = @_;
if($db){
$self->{gene_source_db} = $db;
}
if(!$self->{gene_source_db}){
my $db = $self->get_dbadaptor($self->GENE_SOURCE_DB);
$self->{gene_source_db} = $db;
}
return $self->{gene_source_db}; } |
sub kill_list
{ my ($self, $arg) = @_;
if($arg){
$self->{kill_list} = $arg;
}
if(!$self->{kill_list}){
my $kill_list_object = Bio::EnsEMBL::KillList::KillList
->new(-TYPE => 'protein');
$self->{kill_list} = $kill_list_object->get_kill_list;
}
return $self->{kill_list}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($output_logicname, $protein_count, $bmg_logicname) = rearrange
(['OUTPUT_LOGICNAME', 'PROTEIN_COUNT', 'BMG_LOGICNAME'], @args);
$self->protein_count(20);
my $ph = $self->parameters_hash;
$self->protein_count($ph->{-protein_count});
$self->output_logicname($ph->{-output_logicname});
$self->bmg_logicname($ph->{-bmg_logicname});
$self->protein_count($protein_count);
$self->output_logicname($output_logicname);
$self->bmg_logicname($bmg_logicname);
throw("Need an output logicname ".$self->output_logicname." and a bmg logicname ".
$self->bmg_logicname." defined") if(!$self->output_logicname ||
!$self->bmg_logicname);
parse_config($self, $GENEWISE_CONFIG_BY_LOGIC, $self->bmg_logicname);
return $self; } |
sub output_analysis
{ my ($self, $value) = @_;
if($value &&
$value->isa('Bio::EnsEMBL::Pipeline::Analysis')){
$self->{output_analysis} = $value;
}
return $self->{'output_analysis'}; } |
sub output_logicname
{ my ($self, $value) = @_;
if($value){
$self->{'output_logicname'} = $value;
}
return $self->{'output_logicname'}; } |
sub paf_slice
{ my ($self, $slice) = @_;
if($slice){
$self->{paf_slice} = $slice;
}
if(!$self->{paf_slice}){
print "FETCHING SEQUENCE FROM ".$self->paf_source_db->dbc->dbname."\n";
my $slice = $self->fetch_sequence($self->input_id, $self->paf_source_db,
$self->REPEATMASKING);
$self->{paf_slice} = $slice;
}
return $self->{paf_slice}; } |
sub paf_source_db
{ my ($self, $db) = @_;
if($db){
$self->{paf_source_db} = $db;
}
if(!$self->{paf_source_db}){
my $db = $self->get_dbadaptor($self->PAF_SOURCE_DB);
$self->{paf_source_db} = $db;
}
return $self->{paf_source_db}; } |
sub protein_count
{ my ($self, $value) = @_;
if($value){
$self->{'protein_count'} = $value;
}
return $self->{'protein_count'}; } |
sub run
{ my ($self) = @_;
my %kill_list = %{$self->kill_list} if($self->USE_KILL_LIST);
my @mask_exons;
my @iids;
foreach my $type ( @{$self->BIOTYPES_TO_MASK} ) {
foreach my $mask_genes ( @{ $self->gene_slice->get_all_Genes_by_type($type) } ) {
foreach my $mask_exon ( @{ $mask_genes->get_all_Exons } ) {
if ( $mask_exon->seqname eq $self->gene_slice->id ) {
push @mask_exons, $mask_exon;
}
}
}
}
my @mask_regions;
foreach my $mask_exon ( sort { $a->start <=> $b->start } @mask_exons ) {
if ( @mask_regions and $mask_regions[-1]->{'end'} > $mask_exon->start ) {
if ( $mask_exon->end > $mask_regions[-1]->{'end'} ) {
$mask_regions[-1]->{'end'} = $mask_exon->end;
}
} else {
push @mask_regions, { start => $mask_exon->start, end => $mask_exon->end }
}
}
my $num_seeds = 0;
foreach my $logicname(@{$self->PAF_LOGICNAMES}) {
my %features;
print "FETCHING FEATURES FOR ".$logicname."\n";
my @features = @{$self->paf_slice->get_all_ProteinAlignFeatures($logicname)};
print "HAVE ".@features." features\n";
FEATURE:foreach my $f(@features){
next FEATURE if($self->PAF_MIN_SCORE_THRESHOLD && $f->score < $self->PAF_MIN_SCORE_THRESHOLD);
next FEATURE if($self->PAF_UPPER_SCORE_THRESHOLD && $f->score > $self->PAF_UPPER_SCORE_THRESHOLD);
push(@{$features{$f->hseqname}}, $f);
}
my @ids_to_ignore;
SEQID: foreach my $sid ( keys %features ) {
my $ex_idx = 0;
my $count = 0;
FEAT: foreach my $f ( sort { $a->start <=> $b->start } @{ $features{$sid} } ) {
for ( ; $ex_idx < @mask_regions ; ) {
my $mask_exon = $mask_regions[$ex_idx];
if ( $mask_exon->{'start'} > $f->end ) {
next FEAT;
} elsif ( $mask_exon->{'end'} >= $f->start ) {
push @ids_to_ignore, $f->hseqname;
next SEQID;
} else {
$ex_idx++;
}
}
}
}
print "Ignoring ".@ids_to_ignore." features\n";
foreach my $dud_id ( @ids_to_ignore, keys %kill_list ) {
if ( exists $features{$dud_id} ) {
delete $features{$dud_id};
}
}
$num_seeds += scalar( keys %features );
}
return () if($num_seeds == 0);
my $num_chunks = int( $num_seeds / $self->protein_count ) + 1; for ( my $x = 1 ; $x <= $num_chunks ; $x++ ) {
my $new_iid = $self->query->name . ":$num_chunks:$x";
push @iids, $new_iid;
}
print "HAVE ".@iids." to write to the ref database\n";
$self->output(\@iids); } |
sub write_output
{ my ($self) = @_;
my $sic = $self->db->get_StateInfoContainer;
foreach my $iid(@{$self->output}){
eval{
$sic->store_input_id_analysis($iid,
$self->output_analysis,
'');
};
throw("Failed to store ".$iid." $@") if($@);
logger_info("Stored ".$iid);
} } |
General documentation
CONFIG_ACCESSOR_METHODS | Top |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::BlastMiniGenewise
Arg [2] : Varies, tends to be boolean, a string, a arrayref or a hashref
Function : Getter/Setter for config variables
Returntype: again varies
Exceptions:
Example :