Bio::EnsEMBL::Compara::RunnableDB
GenomeCalcStats
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::GenomeLoadMembers
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::Member(1)
Bio::EnsEMBL::Compara::Member(2)
Statistics::Descriptive
Inherit
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $repmask = Bio::EnsEMBL::Compara::RunnableDB::GenomeLoadMembers->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$repmask->fetch_input(); #reads from DB
$repmask->run();
$repmask->output();
$repmask->write_output(); #writes to DB
Description
This object wraps Bio::EnsEMBL::Analysis::Runnable::Blast to add
functionality to read and write to databases.
The appropriate Bio::EnsEMBL::Analysis object must be passed for
extraction of appropriate parameters.
Methods
Methods description
Title : batch_size Usage : $value = $self->batch_size; Description: Defines the number of jobs the RunnableDB subclasses should run in batch before querying the database for the next job batch. Used by the Hive system to manage the number of workers needed to complete a particular job type. Returntype : integer scalar |
Title : carrying_capacity Usage : $value = $self->carrying_capacity; Description: Defines the total number of Workers of this RunnableDB for a particular analysis_id that can be created in the hive. Used by Queen to manage creation of Workers. Returntype : integer scalar |
Title : fetch_input Usage : $self->fetch_input Function: prepares global variables and DB connections Returns : none Args : none |
Methods code
sub batch_size
{ return 1; } |
sub calc_intergenic_stats
{
my $self = shift;
return unless(defined($self->{'genome_db'}));
print("calc_inter_genic_distance for '", $self->{'genome_db'}->name(), "'\n");
my $memberDBA = $self->{'comparaDBA'}->get_MemberAdaptor();
$memberDBA->_final_clause("ORDER BY m.chr_name, m.chr_start");
my $sortedMembers = $memberDBA->fetch_by_source_taxon('ENSEMBLGENE', $self->{'genome_db'}->taxon_id);
print(scalar(@{$sortedMembers}) . " members to process\n");
my $intergenic_stats = Statistics::Descriptive::Full->new();
my $genesize_stats = Statistics::Descriptive::Full->new();
my $overlapCount=0;
my $lastMember = shift @{$sortedMembers};
foreach my $member (@{$sortedMembers}) {
$genesize_stats->add_data($member->chr_end - $member->chr_start);
if($lastMember->chr_name ne $member->chr_name) {
print("broken syntenty from\n ");
$lastMember->print_member;
$member->print_member;
}
else {
my $dist = ($member->chr_start - $lastMember->chr_end);
if($dist < 0) { $dist=0; $overlapCount++; }
$intergenic_stats->add_data($dist);
}
$lastMember = $member;
}
$intergenic_stats->{'overlapCount'} = $overlapCount;
print("intergenic overlap = ",$overlapCount,"\n");
print("intergenic overlap = ",$intergenic_stats->{'overlapCount'},"\n");
$self->insert_statistics($intergenic_stats, 'intergenic');
$self->insert_statistics($genesize_stats, 'gene_size'); } |
sub carrying_capacity
{ return 20; } |
sub fetch_input
{ my( $self) = @_;
$self->throw("No input_id") unless defined($self->input_id);
print("input_id = ".$self->input_id."\n");
$self->throw("Improper formated input_id") unless ($self->input_id =~ /{/);
my $input_hash = eval($self->input_id);
my $genome_db_id = $input_hash->{'gdb'};
$self->throw("No genome_db_id in input_id") unless defined($genome_db_id);
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
$self->{'genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id);
return 1; } |
sub insert_statistics
{
my $self = shift;
my $stats = shift;
my $dataType = shift;
my $sql = "INSERT ignore INTO genome_db_stats SET data_type='$dataType'"
." ,genome_db_id=".$self->{'genome_db'}->dbID
." ,count='".$stats->count()."'"
." ,mean='".$stats->mean()."'"
." ,median='".$stats->median()."'"
." ,mode='".$stats->mode()."'"
." ,stddev='".$stats->standard_deviation()."'"
." ,variance='".$stats->variance()."'"
." ,min='".$stats->min()."'"
." ,max='".$stats->max()."'";
$sql .= " ,overlap_count='".$stats->{'overlapCount'}."'" if($stats->{'overlapCount'});
print("$sql\n");
my $sth = $self->db->dbc->prepare($sql);
$sth->execute;
$sth->finish;
}
1; } |
sub run
{
my $self = shift;
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
$self->calc_intergenic_stats();
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
return 1; } |
sub write_output
{
my $self = shift;
return 1;
}
} |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _