Bio::EnsEMBL::Compara::RunnableDB HclusterPrepare
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::RunnableDB::HclusterPrepare
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::DBSQL::DBAdaptor
Bio::EnsEMBL::Compara::Graph::ConnectedComponents
Bio::EnsEMBL::Compara::Homology
Bio::EnsEMBL::Compara::MethodLinkSpeciesSet
Bio::EnsEMBL::Compara::NestedSet
Bio::EnsEMBL::Hive
Switch
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
my $aa = $sdba->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name('HclusterPrepare');
my $rdb = new Bio::EnsEMBL::Compara::RunnableDB::HclusterPrepare(
-input_id => "{'species_set'=>[1,2,3,14]}",
-analysis => $analysis);
$rdb->fetch_input
$rdb->run;
Description
Blah
Methods
analyze_table
No description
Code
fetch_categories
No description
Code
fetch_distances
No description
Code
fetch_input
No description
Code
get_params
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
analyze_tabledescriptionprevnextTop
sub analyze_table {
  my $self = shift;

  my $starttime = time();

  my $gdb = $self->{gdb};
  my $gdb_id = $gdb->dbID;
  my $species_name = lc($gdb->name);
  $species_name =~ s/\ /\_/g;
  my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
  # Re-enable the keys before starting the queries
my $sql = "ALTER TABLE $tbl_name ENABLE KEYS"; print("$sql\n") if ($self->debug); my $sth = $self->dbc->prepare($sql); $sth->execute(); $sql = "ANALYZE TABLE $tbl_name"; #print("$sql\n");
$sth = $self->dbc->prepare($sql); $sth->execute(); printf(" %1.3f secs to ANALYZE TABLE\n", (time()-$starttime));
}
fetch_categoriesdescriptionprevnextTop
sub fetch_categories {
  my $self = shift;

  return unless($self->{'gdb'});
  my $gdb = $self->{'gdb'};
  return unless $gdb;

  my $starttime = time();


  my $gdb_id = $gdb->dbID;
  my $species_name = lc($gdb->name);
  $species_name =~ s/\ /\_/g;
  my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
  my $sql = "SELECT DISTINCT ".
            "qmember_id ".
             "FROM $tbl_name WHERE qgenome_db_id=$gdb_id;";
  print("$sql\n");
  my $sth = $self->dbc->prepare($sql);
  $sth->execute();
  printf("%1.3f secs to execute\n", (time()-$starttime));
  print("  done with fetch\n");

  my $filename = $self->{fasta_dir} . "/" . "$tbl_name.hcluster.cat";

  my $outgroup = 1;
  $outgroup = 2 if (defined($self->{outgroups}{$gdb_id}));

  my $member_id_hash;
  while ( my $ref  = $sth->fetchrow_arrayref() ) {
    my ($member_id) = @$ref;
    $member_id_hash->{$member_id} = 1;
  }
  $sth->finish;
  printf("%1.3f secs to gather distinct\n", (time()-$starttime));
  open FILE, ">$filename" or die $!;
  foreach my $member_id (keys %$member_id_hash) {
    print FILE "$member_id"."_","$gdb_id\t$outgroup\n";
  }
  close FILE;
  printf("%1.3f secs to process\n", (time()-$starttime));
}

1;
}
fetch_distancesdescriptionprevnextTop
sub fetch_distances {
  my $self = shift;

  return unless($self->{'gdb'});
  my $gdb = $self->{'gdb'};
  return unless $gdb;

  my $starttime = time();

  my $gdb_id = $gdb->dbID;
  my $species_name = lc($gdb->name);
  $species_name =~ s/\ /\_/g;
  my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
  my $species_set_string = join (",",@{$self->{species_set}});
  my $sql = "SELECT ".
            "concat(qmember_id,'_',qgenome_db_id), ".
            "concat(hmember_id,'_',hgenome_db_id), ".
            "IF(evalue<1e-199,100,ROUND(-log10(evalue)/2)) ".
             "FROM $tbl_name WHERE qgenome_db_id=$gdb_id and hgenome_db_id in ($species_set_string);";
  print("$sql\n");
  my $sth = $self->dbc->prepare($sql);
  $sth->execute();
  printf("%1.3f secs to execute\n", (time()-$starttime));
  print("  done with fetch\n");
  my $filename = $self->{fasta_dir} . "/" . "$tbl_name.hcluster.txt";
  open FILE, ">$filename" or die $!;
  while ( my $ref  = $sth->fetchrow_arrayref() ) {
    my ($query_id, $hit_id, $score) = @$ref;
    print FILE "$query_id\t$hit_id\t$score\n";
  }
  $sth->finish;
  close FILE;
  printf("%1.3f secs to process\n", (time()-$starttime));
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my( $self) = @_;

  $self->{'species_set'} = undef;
  $self->throw("No input_id") unless defined($self->input_id);

  #create a Compara::DBAdaptor which shares the same DBI handle
#with the Pipeline::DBAdaptor that is based into this runnable
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->{gdba} = $self->{'comparaDBA'}->get_GenomeDBAdaptor; $self->get_params($self->parameters); my $input_gdb_id = $self->input_id; my $gdb = $self->{gdba}->fetch_by_dbID($input_gdb_id); throw("no genome_db for $input_gdb_id") unless(defined($gdb)); $self->{gdb} = $gdb; return 1;
}
get_paramsdescriptionprevnextTop
sub get_params {
  my $self         = shift;
  my $param_string = shift;

  return if ($param_string eq "1");

  return unless($param_string);
  print("parsing parameter string : ",$param_string,"\n");

  my $params = eval($param_string);
  return unless($params);

  foreach my $key (keys %$params) {
    print("  $key : ", $params->{$key}, "\n");
  }

  if (defined $params->{'species_set'}) {
    $self->{'species_set'} = $params->{'species_set'};
  }
  if (defined $params->{'fasta_dir'}) {
    $self->{'fasta_dir'} = $params->{'fasta_dir'};
  }
  if (defined $params->{'outgroups'}) {
    foreach my $outgroup (@{$params->{'outgroups'}}) {
      $self->{outgroups}{$outgroup} = 1;
    }
  }

  print("parameters...\n");
  printf("  fasta_dir    : %d\n", $self->{'fasta_dir'});
  printf("  species_set  : (%s)\n", join(',', @{$self->{'species_set'}}));
  printf("  outgroups    : (%s)\n", join(',', keys %{$self->{'outgroups'}}));

  return;
}
rundescriptionprevnextTop
sub run {
  my $self = shift;

  $self->analyze_table();
  $self->fetch_categories();
  $self->fetch_distances();
  return 1;
}
write_outputdescriptionprevnextTop
sub write_output {
  my $self = shift;

#   $self->store_clusters;
# $self->dataflow_clusters;
# # modify input_job so that it now contains the clusterset_id
# my $outputHash = {};
# $outputHash = eval($self->input_id) if(defined($self->input_id) && $self->input_id =~ /^\s*\{.*\}\s*$/);
# $outputHash->{'clusterset_id'} = $self->{'clusterset_id'};
# my $output_id = $self->encode_hash($outputHash);
# $self->input_job->input_id($output_id);
return 1; } ##########################################
#
# internal methods
#
##########################################
# This will make sure that the indexes for paf are fine
}
General documentation
CONTACTTop
  Contact Albert Vilella on module implemetation/design detail: avilella@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _