Raw content of Bio::EnsEMBL::Compara::DBSQL::SubsetAdaptor package Bio::EnsEMBL::Compara::DBSQL::SubsetAdaptor; use strict; use Bio::EnsEMBL::Compara::Member; use Bio::EnsEMBL::Compara::Attribute; use Bio::EnsEMBL::Compara::Subset; use Bio::EnsEMBL::DBSQL::BaseAdaptor; our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); =head2 fetch_by_dbID Arg [1] : int $id the unique database identifier for the feature to be obtained Example : $feat = $adaptor->fetch_by_dbID(1234); Description: Returns the feature created from the database defined by the the id $id. Returntype : Bio::EnsEMBL::Compara::Subset Exceptions : thrown if $id is not defined Caller : general =cut sub fetch_by_dbID{ my ($self,$id) = @_; unless(defined $id) { $self->throw("fetch_by_dbID must have an id"); } my @tabs = $self->_tables; my ($name, $syn) = @{$tabs[0]}; #construct a constraint like 't1.table1_id = 1' my $constraint = "${syn}.${name}_id = $id"; #return first element of _generic_fetch list my ($obj) = @{$self->_generic_fetch($constraint)}; return $obj; } =head2 fetch_by_set_description Arg [1] : string $set_description Example : Description: Returntype : Exceptions : Caller : =cut sub fetch_by_set_description { my ($self,$set_description) = @_; unless(defined $set_description) { $self->throw("fetch_by_set_name must have a set_description"); } #construct a constraint like 't1.table1_id = 1' my $constraint = "s.description = '$set_description'"; print("fetch_by_set_name contraint:\n$constraint\n"); #return first element of _generic_fetch list my ($obj) = @{$self->_generic_fetch($constraint)}; return $obj; } =head2 fetch_all Arg : None Example : Description: Returntype : Exceptions : Caller : =cut sub fetch_all { my $self = shift; return $self->_generic_fetch(); } # # INTERNAL METHODS # ################### =head2 _generic_fetch Arg [1] : (optional) string $constraint An SQL query constraint (i.e. part of the WHERE clause) Arg [2] : (optional) string $logic_name the logic_name of the analysis of the features to obtain Example : $fts = $a->_generic_fetch('contig_id in (1234, 1235)', 'Swall'); Description: Performs a database fetch and returns feature objects in contig coordinates. Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates Exceptions : none Caller : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::_generic_fetch =cut sub _generic_fetch { my ($self, $constraint, $join) = @_; my @tables = $self->_tables; my $columns = join(', ', $self->_columns()); if ($join) { foreach my $single_join (@{$join}) { my ($tablename, $condition, $extra_columns) = @{$single_join}; if ($tablename && $condition) { push @tables, $tablename; if($constraint) { $constraint .= " AND $condition"; } else { $constraint = " $condition"; } } if ($extra_columns) { $columns .= ", " . join(', ', @{$extra_columns}); } } } #construct a nice table string like 'table1 t1, table2 t2' my $tablenames = join(', ', map({ join(' ', @$_) } @tables)); my $sql = "SELECT $columns FROM $tablenames"; my $default_where = $self->_default_where_clause; my $final_clause = $self->_final_clause; #append a where clause if it was defined if($constraint) { $sql .= " WHERE $constraint "; if($default_where) { $sql .= " AND $default_where "; } } elsif($default_where) { $sql .= " WHERE $default_where "; } #append additional clauses which may have been defined $sql .= " $final_clause"; my $sth = $self->prepare($sql); $sth->execute; # print STDERR $sql,"\n"; return $self->_objs_from_sth($sth); } sub _tables { my $self = shift; return (['subset', 's'], ['subset_member', 'sm']); } sub _columns { my $self = shift; return qw (s.subset_id s.description sm.subset_id sm.member_id); } sub _objs_from_sth { my ($self, $sth) = @_; my %column; $sth->bind_columns( \( @column{ @{$sth->{NAME_lc} } } )); my @sets = (); my %setNames; my %setMemberIds; while ($sth->fetch()) { my ($subset_id, $name, $member_id); $subset_id = $column{'subset_id'}; $name = $column{'description'}; $member_id = $column{'member_id'}; if(defined($setMemberIds{$subset_id})) { $setMemberIds{$subset_id}->{$member_id} = $member_id; } else { $setNames{$subset_id} = $name; $setMemberIds{$subset_id} = {}; $setMemberIds{$subset_id}->{$member_id} = $member_id; } } $sth->finish; my @allSubsetIds = keys(%setNames); foreach my $subset_id (@allSubsetIds) { my ($subset, @member_id_list, $member_id); @member_id_list = keys(%{$setMemberIds{$subset_id}}); my $count = $#member_id_list + 1; # print("subset id = $subset_id has $count unique member_ids\n"); $subset = Bio::EnsEMBL::Compara::Subset->new(-dbid => $subset_id, -name => $setNames{$subset_id}, -adaptor => $self); # print("loading set '" . $setNames{$subset_id} . "' id=$subset_id\n"); @{$subset->{'_member_id_list'}} = @member_id_list; push @sets, $subset; } return \@sets } sub _default_where_clause { my $self = shift; return 's.subset_id = sm.subset_id'; } sub _final_clause { my $self = shift; return ''; } # # STORE METHODS # ################ =head2 store Arg [1] : Example : Description: Returntype : Exceptions : Caller : =cut sub store { my ($self,$subset) = @_; unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) { $self->throw( "set arg must be a [Bio::EnsEMBL::Compara::Subset] " . "not a $subset"); } my $sth = $self->prepare("INSERT ignore INTO subset (description) VALUES (?)"); if($sth->execute($subset->description) >0) { $subset->dbID( $sth->{'mysql_insertid'} ); } else { #print("insert failed, do select\n"); my $sth2 = $self->prepare("SELECT subset_id FROM subset WHERE description=?"); $sth2->execute($subset->description); my($id) = $sth2->fetchrow_array(); $subset->dbID($id); $sth2->finish; } $sth->finish; #print("SubsetAdaptor:store() dbID = ", $subset->dbID, "\n"); my @memberIds = @{$subset->member_id_list()}; $sth = $self->prepare("INSERT ignore INTO subset_member (subset_id, member_id) VALUES (?,?)"); foreach my $member_id (@memberIds) { $sth->execute($subset->dbID, $member_id) if($member_id); } $sth->finish; $subset->adaptor($self); return $subset->dbID; } =head2 store_link Arg [1] : Bio::EnsEMBL::Compara::MemberSet $subset Arg [2] : int $member_id Example : Description: Returntype : Exceptions : Caller : =cut sub store_link { my ($self, $subset, $member_id) = @_; unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) { $self->throw( "set arg must be a [Bio::EnsEMBL::Compara::Subset] " . "not a $subset"); } return unless($member_id); my $sth = $self->prepare("INSERT ignore INTO subset_member (subset_id, member_id) VALUES (?,?)"); $sth->execute($subset->dbID, $member_id); $sth->finish; } =head2 delete_link Arg [1] : Bio::EnsEMBL::Compara::MemberSet $subset Arg [2] : int $member_id Example : Description: Returntype : Exceptions : Caller : =cut sub delete_link { my ($self, $subset, $member_id) = @_; unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) { $self->throw( "set arg must be a [Bio::EnsEMBL::Compara::Subset] " . "not a $subset"); } my $sth = $self->prepare("DELETE FROM subset_member WHERE subset_id=? AND member_id=?"); $sth->execute($subset->dbID, $member_id); $sth->finish; } sub dumpFastaForSubset { my($self, $subset, $fastafile) = @_; unless($subset->isa('Bio::EnsEMBL::Compara::Subset')) { $self->throw( "set arg must be a [Bio::EnsEMBL::Compara::Subset] " . "not a $subset"); } unless($subset->dbID) { $self->throw("subset must be in database and dbID defined"); } my $sql = "SELECT member.source_name, member.stable_id, member.genome_db_id," . " member.member_id, member.description, sequence.sequence " . " FROM member, sequence, subset_member " . " WHERE subset_member.subset_id = " . $subset->dbID . " AND member.member_id=subset_member.member_id ". " AND member.sequence_id=sequence.sequence_id " ; # " ORDER BY member.stable_id;"; open FASTAFILE, ">$fastafile" or die "Could open $fastafile for output\n"; print("writing fasta to loc '$fastafile'\n"); my $sth = $self->prepare( $sql ); $sth->execute(); my ($source_name, $stable_id, $genome_db_id, $member_id, $description, $sequence); $sth->bind_columns( undef, \$source_name, \$stable_id, \$genome_db_id, \$member_id, \$description, \$sequence ); while( $sth->fetch() ) { $sequence =~ s/(.{72})/$1\n/g; $genome_db_id ||= 0; print FASTAFILE ">$source_name:$stable_id IDs:$genome_db_id:$member_id $description\n$sequence\n"; } close(FASTAFILE); $sth->finish(); # # update this subset_id's subset.dump_loc with the full path of this dumped fasta file # $sth = $self->prepare("UPDATE subset SET dump_loc = ? WHERE subset_id = ?"); $sth->execute($fastafile, $subset->dbID); $sth->finish; $subset->dump_loc($fastafile); } 1;