Raw content of Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor # Ensembl module for Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor # # Copyright (c) 2004 Ensembl # # You may distribute this module under the same terms as perl itself # # =head1 NAME Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor =head1 SYNOPSIS $vdb = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(...); $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...); # tell the variation database where core database information can be # be found $vdb->dnadb($db); $va = $vdb->get_VariationAdaptor(); $vfa = $vdb->get_VariationFeatureAdaptor(); $sa = $db->get_SliceAdaptor(); # Get a VariationFeature by its internal identifier $vf = $va->fetch_by_dbID(145); # get all VariationFeatures in a region $slice = $sa->fetch_by_region('chromosome', 'X', 1e6, 2e6); foreach $vf (@{$vfa->fetch_all_by_Slice($slice)}) { print $vf->start(), '-', $vf->end(), ' ', $vf->allele_string(), "\n"; } # fetch all genome hits for a particular variation $v = $va->fetch_by_name('rs56'); foreach $vf (@{$vfa->fetch_all_by_Variation($v)}) { print $vf->seq_region_name(), $vf->seq_region_start(), '-', $vf->seq_region_end(),"\n"; } =head1 DESCRIPTION This adaptor provides database connectivity for VariationFeature objects. Genomic locations of variations can be obtained from the database using this adaptor. See the base class BaseFeatureAdaptor for more information. =head1 AUTHOR - Graham McVicker =head1 CONTACT Post questions to the Ensembl development list ensembl-dev@ebi.ac.uk =head1 METHODS =cut use strict; use warnings; package Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor; use Bio::EnsEMBL::Variation::VariationFeature; use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor; use Bio::EnsEMBL::Utils::Exception qw(throw warning); our @ISA = ('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor'); =head2 fetch_all_by_Variation Arg [1] : Bio::EnsEMBL:Variation::Variation $var Example : my @vfs = @{$vfa->fetch_all_by_Variation($var)}; Description: Retrieves all variation features for a given variation. Most variations should only hit the genome once and only a return a single variation feature. Returntype : reference to list Bio::EnsEMBL::Variation::VariationFeature Exceptions : throw on bad argument Caller : general Status : Stable =cut sub fetch_all_by_Variation { my $self = shift; my $var = shift; if(!ref($var) || !$var->isa('Bio::EnsEMBL::Variation::Variation')) { throw('Bio::EnsEMBL::Variation::Variation arg expected'); } if(!defined($var->dbID())) { throw("Variation arg must have defined dbID"); } return $self->generic_fetch("vf.variation_id = ".$var->dbID()); } =head2 fetch_all_genotyped_by_Slice Arg [1] : Bio::EnsEMBL:Variation::Slice $slice Example : my @vfs = @{$vfa->fetch_all_genotyped_by_Slice($slice)}; Description: Retrieves all variation features that have been gentoyped for a given slice. Most variations should only hit the genome once and only a return a single variation feature. Returntype : reference to list Bio::EnsEMBL::Variation::VariationFeature Exceptions : throw on bad argument Caller : general Status : Stable =cut sub fetch_all_genotyped_by_Slice{ my $self = shift; my $slice = shift; my $constraint = "vf.flags & 1"; #call the method fetch_all_by_Slice_constraint with the genotyped constraint return $self->fetch_all_by_Slice_constraint($slice,$constraint); } # method used by superclass to construct SQL sub _tables { return (['variation_feature', 'vf'], [ 'source', 's']); } sub _default_where_clause { my $self = shift; return 'vf.source_id = s.source_id'; } sub _columns { return qw( vf.variation_feature_id vf.seq_region_id vf.seq_region_start vf.seq_region_end vf.seq_region_strand vf.variation_id vf.allele_string vf.variation_name vf.map_weight s.name vf.validation_status vf.consequence_type); } sub _objs_from_sth { my ($self, $sth, $mapper, $dest_slice) = @_; # # This code is ugly because an attempt has been made to remove as many # function calls as possible for speed purposes. Thus many caches and # a fair bit of gymnastics is used. # my $sa = $self->db()->dnadb()->get_SliceAdaptor(); my @features; my %slice_hash; my %sr_name_hash; my %sr_cs_hash; my ($variation_feature_id, $seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand, $variation_id, $allele_string, $variation_name, $map_weight, $source_name, $validation_status, $consequence_type ); $sth->bind_columns(\$variation_feature_id, \$seq_region_id, \$seq_region_start, \$seq_region_end, \$seq_region_strand, \$variation_id, \$allele_string, \$variation_name, \$map_weight, \$source_name, \$validation_status, \$consequence_type); my $asm_cs; my $cmp_cs; my $asm_cs_vers; my $asm_cs_name; my $cmp_cs_vers; my $cmp_cs_name; if($mapper) { $asm_cs = $mapper->assembled_CoordSystem(); $cmp_cs = $mapper->component_CoordSystem(); $asm_cs_name = $asm_cs->name(); $asm_cs_vers = $asm_cs->version(); $cmp_cs_name = $cmp_cs->name(); $cmp_cs_vers = $cmp_cs->version(); } my $dest_slice_start; my $dest_slice_end; my $dest_slice_strand; my $dest_slice_length; if($dest_slice) { $dest_slice_start = $dest_slice->start(); $dest_slice_end = $dest_slice->end(); $dest_slice_strand = $dest_slice->strand(); $dest_slice_length = $dest_slice->length(); } FEATURE: while($sth->fetch()) { #get the slice object my $slice = $slice_hash{"ID:".$seq_region_id}; if(!$slice) { $slice = $sa->fetch_by_seq_region_id($seq_region_id); $slice_hash{"ID:".$seq_region_id} = $slice; $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); $sr_cs_hash{$seq_region_id} = $slice->coord_system(); } # # remap the feature coordinates to another coord system # if a mapper was provided # if($mapper) { my $sr_name = $sr_name_hash{$seq_region_id}; my $sr_cs = $sr_cs_hash{$seq_region_id}; ($sr_name,$seq_region_start,$seq_region_end,$seq_region_strand) = $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs); #skip features that map to gaps or coord system boundaries next FEATURE if(!defined($sr_name)); #get a slice in the coord system we just mapped to if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { $slice = $slice_hash{"NAME:$sr_name:$cmp_cs_name:$cmp_cs_vers"} ||= $sa->fetch_by_region($cmp_cs_name, $sr_name,undef, undef, undef, $cmp_cs_vers); } else { $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||= $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef, $asm_cs_vers); } } # # If a destination slice was provided convert the coords # If the dest_slice starts at 1 and is foward strand, nothing needs doing # if($dest_slice) { if($dest_slice_start != 1 || $dest_slice_strand != 1) { if($dest_slice_strand == 1) { $seq_region_start = $seq_region_start - $dest_slice_start + 1; $seq_region_end = $seq_region_end - $dest_slice_start + 1; } else { my $tmp_seq_region_start = $seq_region_start; $seq_region_start = $dest_slice_end - $seq_region_end + 1; $seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1; $seq_region_strand *= -1; } #throw away features off the end of the requested slice if($seq_region_end < 1 || $seq_region_start > $dest_slice_length) { next FEATURE; } } $slice = $dest_slice; } $validation_status = 0 if (!defined $validation_status); my @states = split(',',$validation_status); my @types = split(',',$consequence_type); #get the different consequence types # consequence_type push @features, $self->_create_feature_fast('Bio::EnsEMBL::Variation::VariationFeature', #push @features, Bio::EnsEMBL::Variation::VariationFeature->new_fast( #if use new_fast, then do not need "-" infront of key, i.e 'start' => $seq_region_start, {'start' => $seq_region_start, 'end' => $seq_region_end, 'strand' => $seq_region_strand, 'slice' => $slice, 'allele_string' => $allele_string, 'variation_name' => $variation_name, 'adaptor' => $self, 'dbID' => $variation_feature_id, 'map_weight' => $map_weight, 'source' => $source_name, 'validation_code' => \@states, 'consequence_type' => \@types || 'INTERGENIC', '_variation_id' => $variation_id}); } return \@features; } =head2 list_dbIDs Arg [1] : none Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()}; Description: Gets an array of internal ids for all simple features in the current db Returntype : list of ints Exceptions : none Caller : general Status : At Risk =cut sub list_dbIDs { my $self = shift; return $self->_list_dbIDs('variation_feature'); } =head2 get_all_synonym_sources Args[1] : Bio::EnsEMBL::Variation::VariationFeature vf Example : my @sources = @{$vf_adaptor->get_all_synonym_sources($vf)}; Description : returns a list of all the sources for synonyms of this VariationFeature ReturnType : reference to list of strings Exceptions : none Caller : general Status : At Risk : Variation database is under development. =cut sub get_all_synonym_sources{ my $self = shift; my $vf = shift; my %sources; my @sources; if(!ref($vf) || !$vf->isa('Bio::EnsEMBL::Variation::VariationFeature')) { throw("Bio::EnsEMBL::Variation::VariationFeature argument expected"); } if (!defined($vf->{'_variation_id'}) && !defined($vf->{'variation'})){ warning("Not possible to get synonym sources for the VariationFeature: you need to attach a Variation first"); return \@sources; } #get the variation_id my $variation_id; if (defined ($vf->{'_variation_id'})){ $variation_id = $vf->{'_variation_id'}; } else{ $variation_id = $vf->variation->dbID(); } #and go to the varyation_synonym table to get the extra sources my $source_name; my $sth = $self->prepare(qq{SELECT s.name FROM variation_synonym vs, source s WHERE s.source_id = vs.source_id AND vs.variation_id = ? }); $sth->bind_param(1,$variation_id,SQL_INTEGER); $sth->execute(); $sth->bind_columns(\$source_name); while ($sth->fetch){ $sources{$source_name}++; } @sources = keys(%sources); return \@sources; } 1;