Bio::EnsEMBL::Compara::RunnableDB UpdatePAFIds
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::RunnableDB::UpdatePAFIds
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::DBSQL::DBAdaptor
Bio::EnsEMBL::Hive
Switch
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
my $aa = $sdba->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name('UpdatePAFIds');
my $rdb = new Bio::EnsEMBL::Compara::RunnableDB::UpdatePAFIds(
-input_id => 1,
-analysis => $analysis);
$rdb->fetch_input
$rdb->run;
Description
This is a compara specific runnableDB, that based on an input_id
of arrayrefs of genome_db_ids, and from this species set relationship
it will search through the peptide_align_feature data and build
SingleLinkage Clusters and store them into a NestedSet datastructure.
This is the first step in the ProteinTree analysis production system.
Methods
fetch_input
No description
Code
get_params
No description
Code
run
No description
Code
updatepafids
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my( $self) = @_;

  $self->{'species_set'} = undef;
  #create a Compara::DBAdaptor which shares the same DBI handle
#with the pipeline DBAdaptor that is based into this runnable
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->{gdba} = $self->{'comparaDBA'}->get_GenomeDBAdaptor; $self->get_params($self->parameters); my @species_set = @{$self->{'species_set'}}; my %seen; foreach my $gdb_id (@species_set) { next if (defined($seen{$gdb_id})); # Make sure we dont have repeated gdbs, specially for setS in Old Homology
my $gdb = $self->{gdba}->fetch_by_dbID($gdb_id); push @{$self->{'genomeDB_set'}}, $gdb; $seen{$gdb_id} = 1; } # Before we were using all the species in genome_db, which is ok for
# EnsEMBL Compara, but could cause problems for people running their
# stuff on subsets of genome_db
# $self->{'genomeDB_set'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_all;
return 1;
}
get_paramsdescriptionprevnextTop
sub get_params {
  my $self         = shift;
  my $param_string = shift;

  return unless($param_string);
  print("parsing parameter string : ",$param_string,"\n");

  my $params = eval($param_string);
  return unless($params);

  foreach my $key (keys %$params) {
    print("  $key : ", $params->{$key}, "\n");
  }

  # Species_set is usually for the new genetree pipeline
if (defined $params->{'species_set'}) { $self->{'species_set'} = $params->{'species_set'}; } # Species_sets is usually for the old homology pipeline
if (defined $params->{'species_sets'}) { foreach my $species_set (@{$params->{'species_sets'}}) { push @{$self->{'species_set'}}, @$species_set; } } return;
}
rundescriptionprevnextTop
sub run {
  my $self = shift;

  $self->updatepafids();
  return 1;
}
updatepafidsdescriptionprevnextTop
sub updatepafids {
  my $self = shift;

  my $starttime = time();

  my @tbl_names;
  foreach my $gdb (@{$self->{'genomeDB_set'}}) {
    my $gdb_id = $gdb->dbID;
    my $species_name = lc($gdb->name);
    $species_name =~ s/\ /\_/g;
    my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
    push @tbl_names, $tbl_name;
  }
  # Find all the max, start from the smallest
my $top_max; foreach my $tbl_name (sort @tbl_names) { my $sql = "SELECT MAX(peptide_align_feature_id) as max". " FROM $tbl_name"; my $sth = $self->dbc->prepare($sql); $sth->execute(); my $first_offset_hash = $sth->fetchrow_hashref; my $first_offset = $first_offset_hash->{max}; $top_max->{$first_offset} = $tbl_name; } my ($first_tbl_name, @rest_tbl_names) = map {$top_max->{$_}} sort {$b<=>$a} keys %{$top_max}; # First offset -- first table remains as it is
my $sql = "SELECT MAX(peptide_align_feature_id) as max". " FROM $first_tbl_name"; my $sth = $self->dbc->prepare($sql); $sth->execute(); my $first_offset_hash = $sth->fetchrow_hashref; my $first_offset = $first_offset_hash->{max}; # Subsequent offsets -- subsequent tables are offsetted
foreach my $tbl_name (sort @rest_tbl_names) { my $sql = "SELECT MIN(peptide_align_feature_id) as min". " FROM $tbl_name"; my $sth = $self->dbc->prepare($sql); $sth->execute(); my $offset_hash = $sth->fetchrow_hashref; my $offset = $offset_hash->{min}; if ($offset > 1) { $sql = "SELECT MAX(peptide_align_feature_id) as max". " FROM $tbl_name"; $sth = $self->dbc->prepare($sql); $sth->execute(); my $second_offset_hash = $sth->fetchrow_hashref; my $second_offset = $second_offset_hash->{max}; $first_offset = $second_offset; next; } # Dont reupdate it if done before
$sql = "SELECT MAX(peptide_align_feature_id) as max". " FROM $tbl_name"; $sth = $self->dbc->prepare($sql); $sth->execute(); my $second_offset_hash = $sth->fetchrow_hashref; my $second_offset = $second_offset_hash->{max}; # my $sql2 = "UPDATE $tbl_name".
# " SET peptide_align_feature_id=peptide_align_feature_id+$first_offset";
# my $sth2 = $self->dbc->prepare($sql2);
# print STDERR "Executing [", $sth2->sql, "].\n";
# $sth2->execute();
#####
my $temp_tbl_name = $tbl_name . "_temp"; my $sql2 = "CREATE TABLE $temp_tbl_name LIKE $tbl_name"; my $sth2 = $self->dbc->prepare($sql2); print STDERR "Executing [", $sth2->sql, "].\n"; $sth2->execute(); $sql2 = "ALTER TABLE $temp_tbl_name AUTO_INCREMENT=$first_offset"; $sth2 = $self->dbc->prepare($sql2); print STDERR "Executing [", $sth2->sql, "].\n"; $sth2->execute(); $sql2 = "ALTER TABLE $temp_tbl_name DISABLE KEYS"; $sth2 = $self->dbc->prepare($sql2); print STDERR "Executing [", $sth2->sql, "].\n"; $sth2->execute(); $sql2 = "INSERT INTO $temp_tbl_name (qmember_id, hmember_id, qgenome_db_id, hgenome_db_id, analysis_id, qstart, qend, hstart, hend, score, evalue, align_length, identical_matches, perc_ident, positive_matches, perc_pos, hit_rank, cigar_line) select qmember_id, hmember_id, qgenome_db_id, hgenome_db_id, analysis_id, qstart, qend, hstart, hend, score, evalue, align_length, identical_matches, perc_ident, positive_matches, perc_pos, hit_rank, cigar_line FROM $tbl_name"; $sth2 = $self->dbc->prepare($sql2); print STDERR "Executing [", $sth2->sql, "].\n"; $sth2->execute(); $sql2 = "DROP TABLE $tbl_name"; $sth2 = $self->dbc->prepare($sql2); print STDERR "Executing [", $sth2->sql, "].\n"; $sth2->execute(); $sql2 = "RENAME TABLE $temp_tbl_name TO $tbl_name"; $sth2 = $self->dbc->prepare($sql2); print STDERR "Executing [", $sth2->sql, "].\n"; $sth2->execute(); #####
$first_offset += $second_offset; } printf(" %1.3f secs to Update PAF Ids\n", (time()-$starttime)); } 1;
}
write_outputdescriptionprevnextTop
sub write_output {
  my $self = shift;
  return 1;
}

##########################################
#
# internal methods
#
##########################################
# This will make sure that the indexes for paf are fine
}
General documentation
CONTACTTop
  Contact Albert Vilella on module implementation/design detail: avilella@ebi.ac.uk
Contact Abel Ureta-Vidal on EnsEMBL/Compara: abel@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _