Bio::EnsEMBL::Compara::RunnableDB
HclusterPrepare
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::HclusterPrepare
Package variables
No package variables defined.
Included modules
Switch
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Synopsis
my $aa = $sdba->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name('HclusterPrepare');
my $rdb = new Bio::EnsEMBL::Compara::RunnableDB::HclusterPrepare(
-input_id => "{'species_set'=>[1,2,3,14]}",
-analysis => $analysis);
$rdb->fetch_input
$rdb->run;
Description
Blah
Methods
analyze_table | No description | Code |
fetch_categories | No description | Code |
fetch_distances | No description | Code |
fetch_input | No description | Code |
get_params | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
analyze_table | description | prev | next | Top |
sub analyze_table
{ my $self = shift;
my $starttime = time();
my $gdb = $self->{gdb};
my $gdb_id = $gdb->dbID;
my $species_name = lc($gdb->name);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $sql = "ALTER TABLE $tbl_name ENABLE KEYS";
print("$sql\n") if ($self->debug);
my $sth = $self->dbc->prepare($sql);
$sth->execute();
$sql = "ANALYZE TABLE $tbl_name";
$sth = $self->dbc->prepare($sql);
$sth->execute();
printf(" %1.3f secs to ANALYZE TABLE\n", (time()-$starttime)); } |
sub fetch_categories
{ my $self = shift;
return unless($self->{'gdb'});
my $gdb = $self->{'gdb'};
return unless $gdb;
my $starttime = time();
my $gdb_id = $gdb->dbID;
my $species_name = lc($gdb->name);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $sql = "SELECT DISTINCT ".
"qmember_id ".
"FROM $tbl_name WHERE qgenome_db_id=$gdb_id;";
print("$sql\n");
my $sth = $self->dbc->prepare($sql);
$sth->execute();
printf("%1.3f secs to execute\n", (time()-$starttime));
print(" done with fetch\n");
my $filename = $self->{fasta_dir} . "/" . "$tbl_name.hcluster.cat";
my $outgroup = 1;
$outgroup = 2 if (defined($self->{outgroups}{$gdb_id}));
my $member_id_hash;
while ( my $ref = $sth->fetchrow_arrayref() ) {
my ($member_id) = @$ref;
$member_id_hash->{$member_id} = 1;
}
$sth->finish;
printf("%1.3f secs to gather distinct\n", (time()-$starttime));
open FILE, ">$filename" or die $!;
foreach my $member_id (keys %$member_id_hash) {
print FILE "$member_id"."_","$gdb_id\t$outgroup\n";
}
close FILE;
printf("%1.3f secs to process\n", (time()-$starttime));
}
1; } |
sub fetch_distances
{ my $self = shift;
return unless($self->{'gdb'});
my $gdb = $self->{'gdb'};
return unless $gdb;
my $starttime = time();
my $gdb_id = $gdb->dbID;
my $species_name = lc($gdb->name);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $species_set_string = join (",",@{$self->{species_set}});
my $sql = "SELECT ".
"concat(qmember_id,'_',qgenome_db_id), ".
"concat(hmember_id,'_',hgenome_db_id), ".
"IF(evalue<1e-199,100,ROUND(-log10(evalue)/2)) ".
"FROM $tbl_name WHERE qgenome_db_id=$gdb_id and hgenome_db_id in ($species_set_string);";
print("$sql\n");
my $sth = $self->dbc->prepare($sql);
$sth->execute();
printf("%1.3f secs to execute\n", (time()-$starttime));
print(" done with fetch\n");
my $filename = $self->{fasta_dir} . "/" . "$tbl_name.hcluster.txt";
open FILE, ">$filename" or die $!;
while ( my $ref = $sth->fetchrow_arrayref() ) {
my ($query_id, $hit_id, $score) = @$ref;
print FILE "$query_id\t$hit_id\t$score\n";
}
$sth->finish;
close FILE;
printf("%1.3f secs to process\n", (time()-$starttime)); } |
sub fetch_input
{ my( $self) = @_;
$self->{'species_set'} = undef;
$self->throw("No input_id") unless defined($self->input_id);
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
$self->{gdba} = $self->{'comparaDBA'}->get_GenomeDBAdaptor;
$self->get_params($self->parameters);
my $input_gdb_id = $self->input_id;
my $gdb = $self->{gdba}->fetch_by_dbID($input_gdb_id);
throw("no genome_db for $input_gdb_id") unless(defined($gdb));
$self->{gdb} = $gdb;
return 1; } |
sub get_params
{ my $self = shift;
my $param_string = shift;
return if ($param_string eq "1");
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
foreach my $key (keys %$params) {
print(" $key : ", $params->{$key}, "\n");
}
if (defined $params->{'species_set'}) {
$self->{'species_set'} = $params->{'species_set'};
}
if (defined $params->{'fasta_dir'}) {
$self->{'fasta_dir'} = $params->{'fasta_dir'};
}
if (defined $params->{'outgroups'}) {
foreach my $outgroup (@{$params->{'outgroups'}}) {
$self->{outgroups}{$outgroup} = 1;
}
}
print("parameters...\n");
printf(" fasta_dir : %d\n", $self->{'fasta_dir'});
printf(" species_set : (%s)\n", join(',', @{$self->{'species_set'}}));
printf(" outgroups : (%s)\n", join(',', keys %{$self->{'outgroups'}}));
return; } |
sub run
{
my $self = shift;
$self->analyze_table();
$self->fetch_categories();
$self->fetch_distances();
return 1; } |
sub write_output
{ my $self = shift;
return 1;
}
} |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _