Raw content of Bio::EnsEMBL::Funcgen::DBSQL::ProbeFeatureAdaptor # # Ensembl module for Bio::EnsEMBL::DBSQL::Funcgen::ProbeFeatureAdaptor # # You may distribute this module under the same terms as Perl itself =head1 NAME Bio::EnsEMBL::DBSQL::Funcgen::ProbeFeatureAdaptor - A database adaptor for fetching and storing ProbeFeature objects. =head1 SYNOPSIS my $ofa = $db->get_ProbeFeatureAdaptor(); my $features = $ofa->fetch_all_by_Probe($probe); $features = $ofa->fetch_all_by_Slice_arrayname($slice, 'Array-1', 'Array-2'); =head1 DESCRIPTION The ProbeFeatureAdaptor is a database adaptor for storing and retrieving ProbeFeature objects. =head1 AUTHOR This module was created by Nathan Johnson. This module is part of the Ensembl project: / =head1 CONTACT Post comments or questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =head1 METHODS =cut use strict; use warnings; package Bio::EnsEMBL::Funcgen::DBSQL::ProbeFeatureAdaptor; use Bio::EnsEMBL::Utils::Exception qw( throw warning ); use Bio::EnsEMBL::Funcgen::ProbeFeature; use Bio::EnsEMBL::Funcgen::DBSQL::BaseFeatureAdaptor; use Bio::EnsEMBL::Funcgen::DBSQL::BaseAdaptor; use vars qw(@ISA); use strict; use warnings; @ISA = qw(Bio::EnsEMBL::Funcgen::DBSQL::BaseFeatureAdaptor Bio::EnsEMBL::Funcgen::DBSQL::BaseAdaptor); =head2 fetch_all_by_Probe Arg [1] : Bio::EnsEMBL::Funcgen::Probe Example : my $features = $ofa->fetch_all_by_Probe($probe); Description: Fetchs all features that a given probe creates. Returntype : Listref of Bio::EnsEMBL::PasteFeature objects Exceptions : Throws if argument is not a stored Probe object Caller : Probe->get_all_ProbeFeatures() Status : At Risk =cut sub fetch_all_by_Probe { my $self = shift; my $probe = shift; if ( !ref($probe) && !$probe->isa('Bio::EnsEMBL::Funcgen::Probe') ) { throw('fetch_all_by_Probe requires a Bio::EnsEMBL::Funcgen::Probe object'); } if ( !defined $probe->dbID() ) { throw('fetch_all_by_Probe requires a stored Bio::EnsEMBL::Funcgen::Probe object'); } return $self->generic_fetch( 'pf.probe_id = ' . $probe->dbID() ); } =head2 fetch_all_by_Probe_id Arg [1] : int - Probe dbID Example : my @features = @{$ofa->fetch_all_by_Probe_id($pid)}; Description: Fetchs all features that a given probe creates. Returntype : Listref of Bio::EnsEMBL::PasteFeature objects Exceptions : Throws if argument not defined Caller : Probe->get_all_ProbeFeatures() Status : At Risk =cut sub fetch_all_by_Probe_id { my $self = shift; my $pid = shift; if ( ! defined $pid ) { throw('Need to specify a probe _id'); } return $self->generic_fetch( 'pf.probe_id = ' . $pid ); } =head2 fetch_all_by_probeset Arg [1] : string - probeset Example : my $features = $ofa->fetch_all_by_probeset('Set-1'); Description: Fetchs all features that a given probeset creates. Returntype : Listref of Bio::EnsEMBL::ProbeFeature objects Exceptions : Throws if no probeset argument Caller : General Status : At Risk =cut sub fetch_all_by_probeset { my $self = shift; my $probeset = shift; throw("Not implmeneted\n"); if (!$probeset) { throw('fetch_all_by_probeset requires a probeset argument'); } return $self->generic_fetch( "p.probeset = '$probeset'" ); } #Need to add: #fetch_all_by_Slice_Experiment #fetch_all_by_Slice_experimentname ? name not unique enough? =head2 fetch_all_by_Slice_arrayname Arg [1] : Bio::EnsEMBL::Slice Arg [2...] : List of strings - array name(s) Example : my $slice = $sa->fetch_by_region('chromosome', '1'); my $features = $ofa->fetch_by_Slice_arrayname($slice, ''); Description: Retrieves a list of features on a given slice that are created by probes from the specified arrays. Returntype : Listref of Bio::EnsEMBL::Funcgen::ProbeFeature objects Exceptions : Throws if no array name is provided Caller : Slice->get_all_ProbesFeatures() Status : At Risk =cut sub fetch_all_by_Slice_arrayname { my ($self, $slice, @arraynames) = @_; throw("This should return data from all experiments, but will break if arrays are mapped to different coord_systems"); throw('Need array name as parameter') if !@arraynames; my $constraint; if (scalar @arraynames == 1) { #Will this work #will this pick up the array_chip_id link from array_chip to probe? $constraint = qq( a.name = '$arraynames[0]' AND a.array_id = ac.array_id ); #$constraint = qq( a.name = '$arraynames[0]' ); } else { throw("Not implemented for multple arrays"); $constraint = join q(','), @arraynames; $constraint = qq( a.name IN ('$constraint') ); } return $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint); } #should this take >1 EC? What if we can't fit a all mappings onto one chip #Would possibly miss some from the slice =head2 fetch_all_by_Slice_ExperimentalChips Arg [1] : Bio::EnsEMBL::Slice Arg [2...] : listref of Bio::EnsEMBL::Funcgen::ExperimentalChip objects Example : my $slice = $sa->fetch_by_region('chromosome', '1'); my $features = $ofa->fetch_by_Slice_arrayname($slice, $exp); Description: Retrieves a list of features on a given slice that are created by probes from the given ExperimentalChip. Returntype : Listref of Bio::EnsEMBL::Funcgen::ProbeFeature objects Exceptions : Throws if no array name is provided Caller : Status : At Risk =cut sub fetch_all_by_Slice_ExperimentalChips { my ($self, $slice, $exp_chips) = @_; my (%nr); foreach my $ec(@$exp_chips){ throw("Need pass listref of valid Bio::EnsEMBL::Funcgen::ExperimentalChip objects") if ! $ec->isa("Bio::EnsEMBL::Funcgen::ExperimentalChip"); $nr{$ec->array_chip_id()} = 1; } my $constraint = " p.array_chip_id IN (".join(", ", keys %nr).") AND p.probe_id = pf.probe_id "; return $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint); } =head2 fetch_all_by_Slice_Array Arg [1] : Bio::EnsEMBL::Slice Arg [2] : Bio::EnsEMBL::Funcgen::Array Example : my $slice = $sa->fetch_by_region('chromosome', '1'); my $features = $pfa->fetch_all_by_Slice_Array($slice, $exp); Description: Retrieves a list of features on a given slice that are created by probes from the given Array. Returntype : Listref of Bio::EnsEMBL::Funcgen::ProbeFeature objects Exceptions : Throws if no array name is provided Caller : Status : At Risk =cut sub fetch_all_by_Slice_Array { my ($self, $slice, $array) = @_; throw("Need pass a valid stored Bio::EnsEMBL::Funcgen::Array object") if (! (ref($array) && $array->isa("Bio::EnsEMBL::Funcgen::Array") && $array->dbID)); my @ac_ids = map $_->dbID, @{$array->get_ArrayChips}; my $constraint = " p.array_chip_id IN (".join(", ", @ac_ids).") AND p.probe_id = pf.probe_id "; return $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint); } =head2 fetch_all_by_Slice_Arrays Arg [1] : Bio::EnsEMBL::Slice Arg [2] : ARRAYREF of Bio::EnsEMBL::Funcgen::Array objects Arg [3] : HASHREF - optional params hash e.g. {logic_name => 'AFFY_ProbeTranscriptAlign'} Example : my $slice = $sa->fetch_by_region('chromosome', '1'); my $features = $pfa->fetch_all_by_Slice_Array($slice, $exp); Description: Retrieves a list of features on a given slice that are created by probes from the given Array. Returntype : Listref of Bio::EnsEMBL::Funcgen::ProbeFeature objects Exceptions : Throws if ARRAYREF if arrays is not provided Caller : Status : At Risk =cut sub fetch_all_by_Slice_Arrays{ my ($self, $slice, $arrays, $params) = @_; my $logic_name; $logic_name = $params->{'logic_name'} if exists ${$params}{'logic_names'}; if(!(ref($arrays) eq 'ARRAY' && @$arrays)){ throw('Must pass an ARRAYREF of Bio::EnsEMBL::Funcgen::Array objects'); } my @ac_ids; foreach my $array(@$arrays){ throw("Must pass an ARRAYREF of valid stored Bio::EnsEMBL::Funcgen::Array objects") if (! (ref($array) && $array->isa("Bio::EnsEMBL::Funcgen::Array") && $array->dbID)); push @ac_ids, (map $_->dbID, @{$array->get_ArrayChips}); } my $constraint = " p.array_chip_id IN (".join(", ", @ac_ids).") AND p.probe_id = pf.probe_id "; return $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint, $logic_name); } =head2 _tables Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns the names and aliases of the tables to use for queries. Returntype : List of listrefs of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _tables { my $self = shift; return ( [ 'probe_feature', 'pf' ], [ 'probe', 'p' ], #[ 'array_chip', 'ac' ],#these are required for array based queries not implemented yet #[ 'array', 'a' ] ); } =head2 _columns Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns a list of columns to use for queries. Returntype : List of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _columns { my $self = shift; return qw( pf.probe_feature_id pf.seq_region_id pf.seq_region_start pf.seq_region_end pf.seq_region_strand pf.probe_id pf.analysis_id pf.mismatches pf.cigar_line p.name ); } =head2 _default_where_clause Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns an additional table joining constraint to use for queries. Returntype : List of strings Exceptions : None Caller : Internal Status : At Risk =cut sub _default_where_clause { my $self = shift; return 'pf.probe_id = p.probe_id';# AND p.array_chip_id = ac.array_chip_id'; } =head2 _final_clause Args : None Example : None Description: PROTECTED implementation of superclass abstract method. Returns an ORDER BY clause. Sorting by probe_feature_id would be enough to eliminate duplicates, but sorting by location might make fetching features on a slice faster. Returntype : String Exceptions : None Caller : generic_fetch Status : At Risk =cut sub _final_clause { return ' ORDER BY pf.seq_region_id, pf.seq_region_start, pf.probe_feature_id'; } =head2 _objs_from_sth Arg [1] : DBI statement handle object Example : None Description: PROTECTED implementation of superclass abstract method. Creates ProbeFeature objects from an executed DBI statement handle. Returntype : Listref of Bio::EnsEMBL::ProbeFeature objects Exceptions : None Caller : Internal Status : At Risk =cut sub _objs_from_sth { my ($self, $sth, $mapper, $dest_slice) = @_; #For EFG this has to use a dest_slice from core/dnaDB whether specified or not. #So if it not defined then we need to generate one derived from the species_name and schema_build of the feature we're retrieving. # This code is ugly because caching is used to improve speed my ($seq_region_id); my $sa = $self->db->get_SliceAdaptor(); $sa = $dest_slice->adaptor->db->get_SliceAdaptor() if($dest_slice);#don't really need this if we're using DNADBSliceAdaptor? #Some of this in now probably overkill as we'll always be using the DNADB as the slice DB #Hence it should always be on the same coord system, unless we're projecting my $aa = $self->db->get_AnalysisAdaptor(); my @features; my (%analysis_hash, %slice_hash, %sr_name_hash, %sr_cs_hash); my ( $probe_feature_id, $efg_seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand, $mismatches, $probe_id, $analysis_id, $probe_name, $cigar_line, ); $sth->bind_columns( \$probe_feature_id, \$efg_seq_region_id, \$seq_region_start, \$seq_region_end, \$seq_region_strand, \$probe_id, \$analysis_id, \$mismatches, \$cigar_line, \$probe_name ); my ($asm_cs, $cmp_cs, $asm_cs_name, $asm_cs_vers ,$cmp_cs_name, $cmp_cs_vers); if ($mapper) { $asm_cs = $mapper->assembled_CoordSystem(); $cmp_cs = $mapper->component_CoordSystem(); $asm_cs_name = $asm_cs->name(); $asm_cs_vers = $asm_cs->version(); $cmp_cs_name = $cmp_cs->name(); $cmp_cs_vers = $cmp_cs->version(); } my ($dest_slice_start, $dest_slice_end, $dest_slice_strand); my ($dest_slice_length, $dest_slice_sr_name); if ($dest_slice) { $dest_slice_start = $dest_slice->start(); $dest_slice_end = $dest_slice->end(); $dest_slice_strand = $dest_slice->strand(); $dest_slice_length = $dest_slice->length(); $dest_slice_sr_name = $dest_slice->seq_region_name(); } #This has already been done by #build seq_region_cache based on slice #$self->build_seq_region_cache_by_Slice($slice); my $last_pfid; FEATURE: while ( $sth->fetch() ) { #Need to build a slice adaptor cache here? #Would only ever want to do this if we enable mapping between assemblies?? #Or if we supported the mapping between cs systems for a given schema_build, which would have to be handled by the core api #This is only required due to multiple records being returned #when using fetch_all_by_Arrays type methods next if($last_pfid && ($last_pfid == $probe_feature_id)); $last_pfid = $probe_feature_id; #get core seq_region_id $seq_region_id = $self->get_core_seq_region_id($efg_seq_region_id); if(! $seq_region_id){ warn "Cannot get slice for eFG seq_region_id $efg_seq_region_id\n". "The region you are using is not present in the cuirrent dna DB"; next; } # Get the analysis object my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id); # Get the slice object my $slice = $slice_hash{'ID:'.$seq_region_id}; if (!$slice) { $slice = $sa->fetch_by_seq_region_id($seq_region_id); $slice_hash{'ID:'.$seq_region_id} = $slice; $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); $sr_cs_hash{$seq_region_id} = $slice->coord_system(); } #need to check once more here as it may not be in the DB, #i.e. a supercontig(non-versioned) may have been deleted between releases my $sr_name = $sr_name_hash{$seq_region_id}; my $sr_cs = $sr_cs_hash{$seq_region_id}; # Remap the feature coordinates to another coord system if a mapper was provided if ($mapper) { throw("Not yet implmented mapper, check equals are Funcgen calls too!"); ($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand) = $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs); # Skip features that map to gaps or coord system boundaries next FEATURE if !defined $sr_name; # Get a slice in the coord system we just mapped to if ( $asm_cs == $sr_cs || ( $cmp_cs != $sr_cs && $asm_cs->equals($sr_cs) ) ) { $slice = $slice_hash{"NAME:$sr_name:$cmp_cs_name:$cmp_cs_vers"} ||= $sa->fetch_by_region($cmp_cs_name, $sr_name, undef, undef, undef, $cmp_cs_vers); } else { $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, $asm_cs_vers); } } # If a destination slice was provided convert the coords # If the destination slice starts at 1 and is forward strand, nothing needs doing if ($dest_slice) { unless ($dest_slice_start == 1 && $dest_slice_strand == 1) { if ($dest_slice_strand == 1) { $seq_region_start = $seq_region_start - $dest_slice_start + 1; $seq_region_end = $seq_region_end - $dest_slice_start + 1; } else { my $tmp_seq_region_start = $seq_region_start; $seq_region_start = $dest_slice_end - $seq_region_end + 1; $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; $seq_region_strand *= -1; } } # Throw away features off the end of the requested slice next FEATURE if $seq_region_end < 1 || $seq_region_start > $dest_slice_length || ( $dest_slice_sr_name ne $sr_name ); $slice = $dest_slice; } push @features, $self->_new_fast( { 'start' => $seq_region_start, 'end' => $seq_region_end, 'strand' => $seq_region_strand, 'slice' => $slice, 'analysis' => $analysis,#we should lazy load this 'adaptor' => $self, 'dbID' => $probe_feature_id, 'mismatchcount' => $mismatches, 'cigar_line' => $cigar_line, 'probe_id' => $probe_id, #'probeset' => $probeset,#???do we need this? '_probe_name' => $probe_name } ); } return \@features; } =head2 _new_fast Args : Hashref to be passed to ProbeFeature->new_fast() Example : None Description: Construct an ProbeFeature object using quick and dirty new_fast. Returntype : Bio::EnsEMBL::Funcgen::ProbeFeature Exceptions : None Caller : _objs_from_sth Status : Medium Risk =cut sub _new_fast { my $self = shift; my $hash_ref = shift; return Bio::EnsEMBL::Funcgen::ProbeFeature->new_fast($hash_ref); } =head2 store Args : List of Bio::EnsEMBL::Funcgen::ProbeFeature objects Example : $ofa->store(@features); Description: Stores given ProbeFeature objects in the database. Should only be called once per feature because no checks are made for duplicates. Sets dbID and adaptor on the objects that it stores. Returntype : None Exceptions : Throws if a list of ProbeFeature objects is not provided or if an analysis is not attached to any of the objects Caller : General Status : At Risk =cut sub store{ my ($self, @ofs) = @_; if (scalar(@ofs) == 0) { throw('Must call store with a list of ProbeFeature objects'); } my $sth = $self->prepare(" INSERT INTO probe_feature ( seq_region_id, seq_region_start, seq_region_end, seq_region_strand, probe_id, analysis_id, mismatches, cigar_line ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) "); my $db = $self->db(); my $analysis_adaptor = $db->get_AnalysisAdaptor(); FEATURE: foreach my $of (@ofs) { if( ! ref $of || ! $of->isa('Bio::EnsEMBL::Funcgen::ProbeFeature') ) { throw('Feature must be an ProbeFeature object'); } if ( $of->is_stored($db) ) { warning('ProbeFeature [' . $of->dbID() . '] is already stored in the database'); next FEATURE; } if ( !defined $of->analysis() ) { throw('An analysis must be attached to the ProbeFeature objects to be stored.'); } # Store the analysis if it has not been stored yet if ( !$of->analysis->is_stored($db) ) { $analysis_adaptor->store( $of->analysis() ); } my $seq_region_id; ($of, $seq_region_id) = $self->_pre_store($of); $sth->bind_param(1, $seq_region_id, SQL_INTEGER); $sth->bind_param(2, $of->start(), SQL_INTEGER); $sth->bind_param(3, $of->end(), SQL_INTEGER); $sth->bind_param(4, $of->strand(), SQL_TINYINT); $sth->bind_param(5, $of->probe_id(), SQL_INTEGER); $sth->bind_param(6, $of->analysis->dbID(), SQL_INTEGER); $sth->bind_param(7, $of->mismatchcount(), SQL_TINYINT); $sth->bind_param(8, $of->cigar_line(), SQL_VARCHAR); $sth->execute(); $of->dbID( $sth->{'mysql_insertid'} ); $of->adaptor($self); } #No need to return this really as the dbID and adaptor has been #updated in the passed arrays of features via the object #reference return \@ofs } =head2 list_dbIDs Args : None Example : my @feature_ids = @{$ofa->list_dbIDs()}; Description: Gets an array of internal IDs for all ProbeFeature objects in the current database. Returntype : List of ints Exceptions : None Caller : ? Status : Medium Risk =cut sub list_dbIDs { my $self = shift; return $self->_list_dbIDs('probe_feature'); } #Probe cache methods? =head2 reassign_features_to_probe Arg[0] : ARRAYREF - feature dbIDs to reassign Arg[1] : int - probe dbID to reassign to Example : $ofa->reassign_feature_to_probe(\@fids, $pid); Description: Update features to link to given probe dbID Returntype : None Exceptions : Throws is args not met Caller : Importer Status : At Risk =cut sub reassign_feature_to_probe{ my ($self, $fids_ref, $pid) = @_; if(! @$fids_ref || ! $pid){ throw('Need to pass a ref to an array of feature ids and a probe id to reassign to'); } my $cmd = 'UPDATE probe_feature SET probe_id='.$pid.' WHERE probe_feature_id IN ('.join(',', @$fids_ref).')'; $self->db->dbc->do($cmd); #This will fail anyway? #if($?){ # throw("SQL Command failed:\t$sql\n$@"); #} return; } =head2 delete_features Arg[0] : ARRAYREF - feature dbIDs to reassign Example : $pfa->delete_feature(\@fids); Description: Deletes feature with given probe_feature_ids Returntype : None Exceptions : Throws if not arg defines Caller : Importer Status : At Risk =cut sub delete_features{ my ($self, $fids_ref) = @_; if(! @$fids_ref){ throw('Need to pass a ref to an array of feature ids'); } my $cmd = 'DELETE from probe_feature WHERE probe_feature_id IN ('.join(',', @$fids_ref).')'; $self->db->dbc->do($cmd); #This will fail anyway? #if($?){ # throw("SQL Command failed:\t$sql\n$@"); #} return; } 1;