Bio::DB::GFF::Adaptor
memory
Toolbar
Summary
Bio::DB::GFF::Adaptor::dbi::mysql -- Database adaptor for a specific mysql schema
Package variables
No package variables defined.
Included modules
File::Basename ' dirname '
Inherit
Synopsis
use Bio::DB::GFF;
my $db = Bio::DB::GFF->new(-adaptor=> 'memory',
-file => 'my_features.gff',
-fasta => 'my_dna.fa'
);
See
Bio::DB::GFF for other methods.
Description
This adaptor implements an in-memory version of Bio::DB::GFF. It can be used to
store and retrieve SHORT GFF files. It inherits from Bio::DB::GFF.
Methods
_convert_feature_hash_to_array | No description | Code |
_feature_by_attribute | No description | Code |
_feature_by_id | No description | Code |
_feature_by_name | Description | Code |
_get_features_by_search_options | No description | Code |
_match_all_attr_in_feature | No description | Code |
_matching_attributes | No description | Code |
_matching_typelist | No description | Code |
dna_db | No description | Code |
do_attributes | No description | Code |
finish_load | No description | Code |
get_abscoords | No description | Code |
get_dna | No description | Code |
get_feature_by_group_id | No description | Code |
get_features | No description | Code |
get_features_iterator | No description | Code |
get_types | No description | Code |
insert_sequence | No description | Code |
load_gff_line | No description | Code |
load_or_store_fasta | No description | Code |
new | No description | Code |
search_notes | No description | Code |
setup_load | No description | Code |
Methods description
Title : _feature_by_name Usage : $db->get_features_by_name($name,$class,$callback) Function: get a list of features by name and class Returns : count of number of features retrieved Args : name of feature, class of feature, and a callback Status : protected
This method is used internally. The callback arguments are those used by make_feature(). |
Methods code
_convert_feature_hash_to_array | description | prev | next | Top |
sub _convert_feature_hash_to_array
{ my @features_hash_array = @_;
use constant FREF => 0;
use constant FSTART => 1;
use constant FSTOP => 2;
use constant FSOURCE => 3;
use constant FMETHOD => 4;
use constant FSCORE => 5;
use constant FSTRAND => 6;
use constant FPHASE => 7;
use constant GCLASS => 8;
use constant GNAME => 9;
use constant TSTART => 10;
use constant TSTOP => 11;
use constant FID => 12;
use constant GID => 13;
my @features_array_array;
my $feature_count = 0;
for my $feature_hash (@{$features_hash_array[0]}){
my @feature_array;
$feature_array[FREF] = $feature_hash->{ref};
$feature_array[FSTART] = $feature_hash->{start};
$feature_array[FSTOP] = $feature_hash->{stop};
$feature_array[FSOURCE] = $feature_hash->{source};
$feature_array[FMETHOD] = $feature_hash->{method};
$feature_array[FSCORE] = $feature_hash->{score};
$feature_array[FSTRAND] = $feature_hash->{strand};
$feature_array[FPHASE ] = $feature_hash->{phase};
$feature_array[GCLASS] = $feature_hash->{gclass};
$feature_array[GNAME] = $feature_hash->{gname};
$feature_array[TSTART] = $feature_hash->{tstart};
$feature_array[TSTOP] = $feature_hash->{tstop};
$feature_array[FID] = $feature_hash->{feature_id};
$feature_array[GID] = $feature_hash->{group_id};
$features_array_array[$feature_count] =\@ feature_array;
$feature_count++;
}
return\@ features_array_array; } |
sub _feature_by_attribute
{ my $self = shift;
my ($attributes,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $count = 0;
my $feature_id = -1;
my $feature_group_id = undef;
for my $feature (@{$self->{data}}) {
$feature_id++;
for my $attr (@{$feature->{attributes}}) {
my ($attr_name,$attr_value) = @$attr ;
foreach (keys %$attributes) {
if ($_ eq $attr_name && $attributes->{$_} eq $attr_value){
$callback->($feature->{ref},
$feature->{start},
$feature->{stop},
$feature->{source},
$feature->{method},
$feature->{score},
$feature->{strand},
$feature->{phase},
$feature->{gclass},
$feature->{gname},
$feature->{tstart},
$feature->{tstop},
$feature_id,
$feature_group_id);
$count++;
}
}
}
}
}
} |
sub _feature_by_id
{ my $self = shift;
my ($ids,$type,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $feature_group_id = undef;
my $count = 0;
if ($type eq 'feature'){
for my $feature_id (@$ids){
my $feature = ${$self->{data}}[$feature_id];
$callback->($feature->{ref},
$feature->{start},
$feature->{stop},
$feature->{source},
$feature->{method},
$feature->{score},
$feature->{strand},
$feature->{phase},
$feature->{gclass},
$feature->{gname},
$feature->{tstart},
$feature->{tstop},
$feature_id,
$feature_group_id);
$count++;
}
}
}
} |
sub _feature_by_name
{ my $self = shift;
my ($class,$name,$location,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $count = 0;
my $id = -1;
my $regexp;
if ($name =~ /[*?]/) { $name =~ quotemeta($name);
$name =~ s/\\\*/.*/g;
$name =~ s/\\\?/.?/g;
$regexp++;
}
for my $feature (@{$self->{data}}) {
$id++;
next unless ($regexp && $feature->{gname} =~ /$name/i) || $feature->{gname} eq $name;
next unless $feature->{gclass} eq $class;
if ($location) {
next if $location->[0] ne $feature->{ref};
next if $location->[1] && $location->[1] > $feature->{stop};
next if $location->[2] && $location->[2] < $feature->{start};
}
$count++;
$callback->(@{$feature}{qw(
ref
start
stop
source
method
score
strand
phase
gclass
gname
tstart
tstop
)},$id,0
);
}
return $count;
}
} |
sub _get_features_by_search_options
{ my $count = 0;
my ($data,$search,$options) = @_;
my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes) =
(@{$search}{qw(rangetype refseq refclass start stop types)},
@{$options}{qw(sparse sort_by_group ATTRIBUTES)}) ;
my @found_features;
my $feature_id = -1 ;
my $feature_group_id = undef;
for my $feature (@{$data}) {
$feature_id++;
my $feature_start = $feature->{start};
my $feature_stop = $feature->{stop};
my $feature_ref = $feature->{ref};
if (defined $refseq){
next unless $feature_ref eq $refseq;
}
if (defined $start or defined $stop) {
$start = 0 unless defined($start);
$stop = MAX_SEGMENT unless defined($stop);
if ($rangetype eq 'overlaps') {
next unless $feature_stop >= $start && $feature_start <= $stop;
} elsif ($rangetype eq 'contains') {
next unless $feature_start >= $start && $feature_stop <= $stop;
} elsif ($rangetype eq 'contained_in') {
next unless $feature_start <= $start && $feature_stop >= $stop;
} else {
next unless $feature_start == $start && $feature_stop == $stop;
}
}
my $feature_source = $feature->{source};
my $feature_method = $feature->{method};
if (defined $types && @$types){
next unless _matching_typelist($feature_method,$feature_source,$types);
}
my $feature_attributes = $feature->{attributes};
if (defined $attributes){
next unless _matching_attributes($feature_attributes,$attributes);
}
my $found_feature = $feature ;
$found_feature->{feature_id} = $feature_id;
$found_feature->{group_id} = $feature_group_id;
push @found_features,$found_feature;
}
return\@ found_features;
}
} |
sub _match_all_attr_in_feature
{ my ($attr_name,$attr_value,$feature_attributes) = @_;
for my $attr (@$feature_attributes) {
my ($feature_attr_name,$feature_attr_value) = @$attr ;
next if ($attr_name ne $feature_attr_name || $attr_value ne $feature_attr_value);
return 1;
}
return 0; } |
sub _matching_attributes
{ my ($feature_attributes,$attributes) = @_ ;
foreach (keys %$attributes) {
return 0 if !_match_all_attr_in_feature($_,$attributes->{$_},$feature_attributes)
}
return 1; } |
sub _matching_typelist
{ my ($feature_method,$feature_source,$typelist) = @_;
foreach (@$typelist) {
my ($search_method,$search_source) = @$_;
next if $search_method ne $feature_method;
next if defined($search_source) && $search_source ne $feature_source;
return 1;
}
return 0; } |
sub dna_db
{ my $self = shift;
my $d = $self->{dna_db};
$self->{dna_db} = shift if @_;
$d; } |
sub do_attributes
{ my $self = shift;
my ($feature_id,$tag) = @_;
my $attr ;
my $feature = ${$self->{data}}[$feature_id];
my @result;
for my $attr (@{$feature->{attributes}}) {
my ($attr_name,$attr_value) = @$attr ;
if (defined($tag) && $attr_name eq $tag){push @result,$attr_value;}
elsif (!defined($tag)) {push @result,($attr_name,$attr_value);}
}
return @result;
}
} |
sub get_abscoords
{ my $self = shift;
my ($name,$class,$refseq) = @_;
my %refs;
my $regexp;
if ($name =~ /[*?]/) { $name =~ quotemeta($name);
$name =~ s/\\\*/.*/g;
$name =~ s/\\\?/.?/g;
$regexp++;
}
for my $feature (@{$self->{data}}) {
my $no_match_class_name;
my $empty_class_name;
if (defined $feature->{gname} and defined $feature->{gclass}){
my $matches = $feature->{gclass} eq $class
&& ($regexp ? $feature->{gname} =~ /$name/i : $feature->{gname} eq $name);
$no_match_class_name = !$matches; }
else{
$empty_class_name = 1;
}
if ($no_match_class_name || $empty_class_name){
my $feature_attributes = $feature->{attributes};
my $attributes = {Alias => $name};
if (!_matching_attributes($feature_attributes,$attributes)){
next;
}
}
push @{$refs{$feature->{ref}}},$feature;
}
if (! %refs) {
$self->error("$name not found in database");
return;
} elsif (keys %refs > 1) {
$self->error("$name has more than one reference sequence in database");
return;
}
my ($ref) = keys %refs;
my @found = @{$refs{$ref}};
my ($strand,$start,$stop);
foreach (@found) {
$strand ||= $_->{strand};
$strand = '+' if $strand && $strand eq '.';
$start = $_->{start} if !defined($start) || $start > $_->{start};
$stop = $_->{stop} if !defined($stop) || $stop < $_->{stop};
my @found_segments;
foreach my $ref (keys %refs) {
next if defined($refseq) and $ref ne $refseq;
my @found = @{$refs{$ref}};
my ($strand,$start,$stop);
foreach (@found) {
$strand ||= $_->{strand};
$strand = '+' if $strand && $strand eq '.';
$start = $_->{start} if !defined($start) || $start > $_->{start};
$stop = $_->{stop} if !defined($stop) || $stop < $_->{stop};
}
push @found_segments,[$ref,$class,$start,$stop,$strand];
}
return\@ found_segments; } |
sub get_dna
{ my $self = shift;
my ($id,$start,$stop,$class) = @_;
if (my $dna_db = $self->dna_db) {
return $dna_db->seq($id,$start=>$stop);
}
return $self->{dna}{$id} if !defined $start || !defined $stop;
$start = 1 if !defined $start;
my $reversed = 0;
if ($start > $stop) {
$reversed++;
($start,$stop) = ($stop,$start);
}
my $dna = substr($self->{dna}{$id},$start-1,$stop-$start+1);
if ($reversed) {
$dna =~ tr/gatcGATC/ctagCTAG/;
$dna = reverse $dna;
}
$dna;
}
} |
sub get_feature_by_group_id
{1; } |
sub get_features
{ my $self = shift;
my $count = 0;
my ($search,$options,$callback) = @_;
my $data =\@ {$self->{data}};
my $found_features;
$found_features = _get_features_by_search_options($data,$search,$options);
@{$found_features} = sort {"$a->{gclass}:$a->{gname}" cmp "$b->{gclass}:$b->{gname}"}
@{$found_features} if $options->{sort_by_group} ;
for my $feature (@{$found_features}) { $count++;
$callback->(
@{$feature}{qw(ref start stop source method score strand phase gclass gname tstart tstop feature_id feature_group_id)}
);
}
return $count;
}
} |
sub get_features_iterator
{ my $self = shift;
my ($search,$options,$callback) = @_;
$callback || $self->throw('must provide a callback argument');
my $data =\@ {$self->{data}};
my $results = _get_features_by_search_options($data,$search,$options);
my $results_array = _convert_feature_hash_to_array($results);
return Bio::DB::GFF::Adaptor::memory_iterator->new($results_array,$callback);
}
} |
sub get_types
{ my $self = shift;
my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
my(%result,%obj);
for my $feature (@{$self->{data}}) {
my $feature_start = $feature->{start};
my $feature_stop = $feature->{stop};
my $feature_ref = $feature->{ref};
my $feature_class = $feature->{class};
my $feature_method = $feature->{method};
my $feature_source = $feature->{source};
if (defined $srcseq){
next unless $feature_ref eq $srcseq ;
}
if (defined $class){
next unless $feature_class eq $class ;
}
if (defined $start or defined $stop) {
$start = 1 unless defined $start;
$stop = MAX_SEGMENT unless defined $stop;
next unless $feature_stop >= $start && $feature_start <= $stop;
}
if (defined $typelist && @$typelist){
next unless _matching_typelist($feature_method,$feature_source,$typelist);
}
my $type = Bio::DB::GFF::Typename->new($feature_method,$feature_source);
$result{$type}++;
$obj{$type} = $type;
}
return $want_count ? %result : values %obj;
}
} |
sub insert_sequence
{ my $self = shift;
my($id,$offset,$seq) = @_;
$self->{dna}{$id} .= $seq;
}
} |
sub load_gff_line
{ my $self = shift;
my $feature_hash = shift;
$feature_hash->{strand} = '' if $feature_hash->{strand} && $feature_hash->{strand} eq '.';
$feature_hash->{phase} = '' if $feature_hash->{phase} && $feature_hash->{phase} eq '.';
push @{$self->{data}},$feature_hash;
}
} |
sub load_or_store_fasta
{ my $self = shift;
my $fasta = shift;
if ((-f $fasta && -w dirname($fasta))
or
(-d $fasta && -w $fasta)) {
require Bio::DB::Fasta;
my $dna_db = Bio::DB::Fasta->new($fasta)
or $self->throw("Couldn't create a new Bio::DB::Fasta index from $fasta");
$self->dna_db($dna_db);
} else {
$self->load_fasta($fasta);
} } |
sub new
{ my $class = shift ;
my ($file,$fasta,$dbdir) = rearrange([
[qw(GFF FILE DIRECTORY)],
'FASTA',
[qw(DSN DB DIR)],
],@_);
my $self = bless{ data => [] },$class;
$file ||= $dbdir;
$fasta ||= $dbdir;
$self->load_gff($file) if $file;
$self->load_or_store_fasta($fasta) if $fasta;
return $self; } |
sub search_notes
{ my $self = shift;
my ($search_string,$limit) = @_;
my @results;
my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
for my $feature (@{$self->{data}}) {
next unless defined $feature->{gclass} && defined $feature->{gname}; next unless $feature->{attributes};
my @attributes = @{$feature->{attributes}};
my @values = map {$_->[1]} @attributes;
my $value = "@values";
my $matches = 0;
my $note;
for my $w (@words) {
my @hits = $value =~ /($w)/g;
$note ||= $value if @hits;
$matches += @hits;
}
next unless $matches;
my $relevance = 10 * $matches;
my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
push @results,[$featname,$note,$relevance];
last if @results >= $limit;
}
@results;
}
} |
General documentation
Use Bio::DB::GFF->new() to construct new instances of this class.
Three named arguments are recommended:
Argument Description
-adaptor Set to "memory" to create an instance of this class.
-gff Read the indicated file or directory of .gff file.
-fasta Read the indicated file or directory of fasta files.
-dsn Indicates a directory containing .gff and .fa files
If you use the -dsn option and the indicated directory is writable by
the current process, then this library will create a FASTA file index
that greatly diminishes the memory usage of this module.
none ;-)
Shuly Avraham <avraham@cshl.org>.
Copyright (c) 2002 Cold Spring Harbor Laboratory.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.