Raw content of Bio::EnsEMBL::Funcgen::DBSQL::ProbeSetAdaptor # # Ensembl module for Bio::EnsEMBL::Funcgen::DBSQL::ProbeSetAdaptor # # You may distribute this module under the same terms as Perl itself =head1 NAME Bio::EnsEMBL::DBSQL::ProbeSetAdaptor - A database adaptor for fetching and storing ProbeSet objects. =head1 SYNOPSIS my $opa = $db->get_ProbeSetAdaptor(); my $probeset = $opa->fetch_by_array_probeset_name('Array-1', 'ProbeSet-1'); =head1 DESCRIPTION The ProbeSetAdaptor is a database adaptor for storing and retrieving ProbeSet objects. =head1 AUTHOR This module was created by Nathan Johnson, but is almost entirely based on the ProbeAdaptor module written by Ian Sealy and Arne Stabenau. This module is part of the Ensembl project: / =head1 CONTACT Post comments or questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =head1 METHODS =cut use strict; use warnings; package Bio::EnsEMBL::Funcgen::DBSQL::ProbeSetAdaptor; use Bio::EnsEMBL::Utils::Exception qw( throw warning ); use Bio::EnsEMBL::Funcgen::ProbeSet; use Bio::EnsEMBL::DBSQL::BaseAdaptor; use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor); #may need to pass array object, as there is a possibilty of it being non-unique between vendors? =head2 fetch_by_array_probeset_name Arg [1] : string - name of array Arg [2] : string - name of probeset Example : my $probeset = $opsa->fetch_by_array_probeset_name('Array-1', 'Probeset-1'); Description: Returns a probeset given the array name and probeset name This will uniquely define a probeset. Only one probeset is ever returned. Returntype : Bio::EnsEMBL::ProbeSet Exceptions : None Caller : General Status : At Risk =cut sub fetch_by_array_probeset_name { my $self = shift; my $array_name = shift; my $probeset_name = shift; my $sth = $self->prepare(" SELECT probe_set_id FROM probe_set ps, array a, array_chip ac WHERE a.array_id = ac.array_id AND a.name = ? AND ps.name = ? "); $sth->bind_param(1, $array_name, SQL_VARCHAR); $sth->bind_param(2, $probeset_name, SQL_VARCHAR); $sth->execute(); my ($probeset_id) = $sth->fetchrow(); if ($probeset_id) { return $self->fetch_by_dbID($probeset_id); } else { return undef; } } =head2 fetch_all_by_Array Arg [1] : Bio::EnsEMBL::Array Example : my @probesets = @{$opsa->fetch_all_by_Array($array)}; Description: Fetch all probes on a particular array. Returntype : Listref of Bio::EnsEMBL::ProbeSet objects. Exceptions : None Caller : General Status : At Risk =cut sub fetch_all_by_Array { my $self = shift; my $array = shift; throw("Not yet implemented"); my ($probeset_id, @probesets); if ( !ref($array) || !$array->isa('Bio::EnsEMBL::Array') ) { warning('fetch_all_by_Array requires a Bio::EnsEMBL::Array object'); return []; } my $array_id = $array->dbID(); if (!defined $array_id) { warning('fetch_all_by_Array requires a stored Bio::EnsEMBL::Array object'); return []; } #Nath #retrieve all array_chip_ids and do a generic fetch using a joined or statement? #or #build and array of probesets using the fetch_by_dbID method my $sth = $self->prepare(" SELECT probe_set_id FROM probe_set ps, array a, array_chip ac WHERE a.array_id = ac.array_id AND ac.array_chip_id = ps.array_chip_id AND a.name = $array_id "); $sth->execute(); while($probeset_id = $sth->fetchrow()){ push @probesets, $self->fetch_by_dbID($probeset_id); } return \@probesets; } =head2 fetch_by_ProbeFeature Arg [1] : Bio::EnsEMBL::ProbeFeature Example : my $probeset = $opsa->fetch_by_ProbeFeature($feature); Description: Returns the probeset that created a particular feature. Returntype : Bio::EnsEMBL::ProbeSet Exceptions : Throws if argument is not a Bio::EnsEMBL::ProbeFeature object Caller : General Status : At Risk =cut sub fetch_by_ProbeFeature { my $self = shift; my $feature = shift; if ( !ref($feature) || !$feature->isa('Bio::EnsEMBL::Funcgen::ProbeFeature') || !$feature->{'probe_id'} ) { throw('fetch_by_ProbeFeature requires a stored Bio::EnsEMBL::Funcgen::ProbeFeature object'); } my $sth = $self->prepare(" SELECT probe_set_id FROM probe_set ps, probe p, probe_feature pf WHERE pf.probe_id = p.probe_id AND ps.probe_set_id = p.probe_set_id AND pf.probe_feature_id = ? "); $sth->bind_param(1, $feature->{'probe_id'}, SQL_VARCHAR); $sth->execute(); my ($probeset_id) = $sth->fetchrow(); return $self->fetch_by_dbID($probeset_id); } =head2 _tables Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns the names and aliases of the tables to use for queries. Returntype : List of listrefs of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _tables { my $self = shift; return [ 'probe_set', 'ps' ]; } =head2 _columns Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns a list of columns to use for queries. Returntype : List of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _columns { my $self = shift; #remove xref_id and use xref tables return qw( ps.probe_set_id ps.name ps.size ps.family); } =head2 _objs_from_sth Arg [1] : DBI statement handle object Example : None Description: PROTECTED implementation of superclass abstract method. Creates ProbeSet objects from an executed DBI statement handle. Returntype : Listref of Bio::EnsEMBL::ProbeSet objects Exceptions : None Caller : Internal Status : At Risk =cut sub _objs_from_sth { my ($self, $sth) = @_; my (@result, $current_dbid, $probeset_id, $name, $size, $family); my ($array, %array_cache); $sth->bind_columns( \$probeset_id, \$name, \$size, \$family); #do not have array_chip adaptor #use array adaptor directly #how are we going ot handle the cache here????? my $probeset; while ( $sth->fetch() ) { #$array = $array_cache{$array_id} || $self->db->get_ArrayAdaptor()->fetch_by_dbID($array_id); #This is nesting array object in probeset! #$array = $array_cache{$arraychip_id} || $self->db->get_ArrayAdaptor()->fetch_by_array_chip_dbID($arraychip_id); #Is this required? or should we lazy load this? #Should we also do the same for probe i.e. nest or lazy load probeset #Setting here prevents, multiple queries, but if we store the array cache in the adaptor we can overcome this #danger of eating memory here, but it's onld the same as would be used for generating all the probesets #what about clearing the cache? #also as multiple array_chips map to same array, cache would be redundant #need to store only once and reference. #have array_cache and arraychip_map #arraychip_map would give array_id which would be key in array cache #This is kinda reinventing the wheel, but reducing queries and redundancy of global cache #cache would never be populated if method not called #there for reducing calls and memory, increasing speed of generation/initation #if method were called #would slightly slow down processing, and would slightly increase memory as cache(small as non-redundant) #and map hashes would persist #Do we even need this???? #warn("Can we lazy load the arrays from a global cache, which is itself lazy loaded and non-redundant?\n"); #this current id stuff is due to lack of probeset table in core #if (!$current_dbid || $current_dbid != $probeset_id) { # New probeset $probeset = Bio::EnsEMBL::Funcgen::ProbeSet->new ( -dbID => $probeset_id, -name => $name, -size => $size, # -array => $array, -family => $family, -adaptor => $self, ); push @result, $probeset; #$current_dbid = $probeset_id; #} else { # # Extend existing probe # $probe->add_Array_probename($array, $name); #} } return \@result; } =head2 store Arg [1] : List of Bio::EnsEMBL::Funcgen::ProbeSet objects Example : $opa->store($probeset1, $probeset2, $probeset3); Description: Stores given ProbeSet objects in the database. Should only be called once per probe because no checks are made for duplicates.??? It certainly looks like there is :/ Sets dbID and adaptor on the objects that it stores. Returntype : None Exceptions : Throws if arguments are not Probe objects Caller : General Status : At Risk =cut sub store { my ($self, @probesets) = @_; my ($sth, $array); if (scalar @probesets == 0) { throw('Must call store with a list of Probe objects'); } my $db = $self->db(); PROBESET: foreach my $probeset (@probesets) { if ( !ref $probeset || !$probeset->isa('Bio::EnsEMBL::Funcgen::ProbeSet') ) { throw('ProbeSet must be an ProbeSet object'); } if ( $probeset->is_stored($db) ) { warning('ProbeSet [' . $probeset->dbID() . '] is already stored in the database'); next PROBESET; } # Get all the arrays this probe is on and check they're all in the database #my $arrays = $probeset->get_all_Arrays(); #my @stored_arrays; #for $array (@$arrays) { # if ( defined $array->dbID() ) { # push @stored_arrays, $array; # } #} #if ( !@stored_arrays ) { # warning('ProbeSets need attached arrays to be stored in the database'); # next PROBESET; #} # Insert separate entry (with same probe_set_id) in probe_set # for each array the probe is on #my $dbID; #for $array (@stored_arrays) { # # if (defined $dbID) { # $sth = $self->prepare(" # INSERT INTO probe_set # (probe_set_id, name, size, array_chip_id, family) # VALUES (?, ?, ?, ?, ?, ?) # "); # # # Probesets we've seen already # $sth->bind_param(1, $dbID, SQL_INTEGER); # $sth->bind_param(2, $probeset->name(), SQL_VARCHAR); # $sth->bind_param(3, $probeset->size(), SQL_INTEGER); # $sth->bind_param(4, $probeset->array_chip_id(), SQL_INTEGER); # $sth->bind_param(5, $probeset->family(), SQL_VARCHAR); # $sth->execute(); # } else { # New probeset $sth = $self->prepare(" INSERT INTO probe_set (name, size, family) VALUES (?, ?, ?) "); $sth->bind_param(1, $probeset->name(), SQL_VARCHAR); $sth->bind_param(2, $probeset->size(), SQL_INTEGER); $sth->bind_param(3, $probeset->family(), SQL_VARCHAR); $sth->execute(); my $dbID = $sth->{'mysql_insertid'}; $probeset->dbID($dbID); $probeset->adaptor($self); #} #} } return \@probesets; } =head2 list_dbIDs Arg [1] : none Example : my @ps_ids = @{$opa->list_dbIDs()}; Description: Gets an array of internal IDs for all ProbeSet objects in the current database. Returntype : List of ints Exceptions : None Caller : ? Status : Medium Risk =cut sub list_dbIDs { my ($self) = @_; return $self->_list_dbIDs('probe_set'); } 1;