Raw content of Bio::EnsEMBL::Analysis::RunnableDB::Finished ### Bio::EnsEMBL::Analysis::RunnableDB::Finished package Bio::EnsEMBL::Analysis::RunnableDB::Finished; use strict; use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning); use Bio::EnsEMBL::Pipeline::SeqFetcher::Finished_Dfetch; use base 'Bio::EnsEMBL::Analysis::RunnableDB'; my %ana2extdbid = ( Uniprot_raw => { "." => 2250 }, Uniprot_SW => { "-" => 2202 , "^\\w+\\.\\d+" => 2200 }, Uniprot_TR => { "." => 2000 }, default => { "." => 700 }, refseq => { "M_" => 1800, "P_" => 1810, "R_" => 1820, "^(AC|N[CGTWSZ])_" => 1830 } ); sub get_extdb_id { my ($self, $logic_name, $hit_name) = @_; my $hash; # get the right analysis hash foreach (keys %ana2extdbid) { if( $logic_name =~ /$_/ ) { $hash = $ana2extdbid{$_}; } } $hash = $ana2extdbid{default} unless $hash; # check the hit_name foreach (keys %$hash) { if( $hit_name =~ /$_/ ){ return $hash->{$_}; } } throw("Cannot get external_db id for hit $hit_name and analysis $logic_name\n"); } sub write_output { my ($self) = @_; my $outputs = $self->Bio::EnsEMBL::Analysis::RunnableDB::Finished::output; ### code db_version_searched method may be duplicated in several modules ### How does it get written into the input_id_analysis table? foreach my $runnable ( @{ $self->runnable } ) { my $db_version = $runnable->get_db_version if $runnable->can('get_db_version'); $self->db_version_searched($db_version); # make sure we set this here } my $dbh = $self->db->dbc->db_handle; $dbh->begin_work; eval { # We expect an array of arrays from output() foreach my $output (@$outputs) { next unless @$output; # No feature output my $feat = $output->[0]; my $is_align = ref($feat) =~ /AlignFeature$/ ? 1 : 0; # The type of adaptor used to store the data depends # upon the type of the first element of @$output my $adaptor = $self->get_adaptor($feat); # Remove the AlignFeatures already in db from the output and # get rid of the old ones in the db (for dephtfilter features only) my $all = 0; if ($is_align) { $all = 1 if($self->analysis->module =~ /DepthFilter/ ); $self->remove_stored_AlignFeatures($adaptor, $output, $all); $self->write_descriptions($output) unless ( $all || $self->analysis->logic_name =~ /refseq/); } else {# Remove all SimpleFeatures $self->remove_all_features($adaptor); } my $analysis = $self->analysis; my $logic = $analysis->logic_name; my $slice = $self->query; my $ff = $self->feature_factory; foreach my $feature (@$output) { if($is_align){ my $hit_name = $feature->hseqname; $feature->external_db_id($self->get_extdb_id($logic,$hit_name)); } $feature->analysis($analysis); $feature->slice($slice) if (!$feature->slice); $ff->validate($feature); } if($adaptor->can('db_version')) { $adaptor->db_version($self->db_version_searched); } # Store features in the database print STDOUT "Finished: Writing ".scalar(@$output)." new ".ref($feat)." in the database\n"; $adaptor->store(@$output) unless !@$output; } $dbh->commit; }; if ($@) { $dbh->rollback; throw("UNABLE TO WRITE FEATURES IN DATABASE\n[$@]\n"); } } sub output{ my ($self, @output) = @_; if(!$self->{'output'}){ $self->{'output'} = []; } if(scalar(@output)){ push(@{$self->{'output'}}, @output); } if(ref($self->{'output'}->[0]) ne 'ARRAY') { return [ $self->{'output'} ] ; } return $self->{'output'}; } sub replace_output { my ($self, @output) = @_; if(scalar(@output)){ $self->{'output'} = []; push(@{$self->{'output'}}, @output); } return $self->{'output'}; } sub remove_stored_AlignFeatures { my ($self, $adaptor, $output, $all) = @_; ## create a hashtable of the contig hits stored in the database my $db_features = $adaptor->fetch_all_by_Slice($self->query, $self->analysis->logic_name); print STDOUT "Finished: Found ", scalar(@$db_features), " features already stored in the database\n"; print STDOUT "Finished: Found ", scalar(@$output), " features in the output\n"; my %db_feat = map { $self->get_feature_key($_), $_ } @$db_features; my %db_feat_to_del = %db_feat; ## remove old Uniprot features with missing sequence version. if($self->analysis->logic_name eq 'Uniprot_raw') { foreach my $new_f (@$output) { $new_f->slice($self->query) if (!$new_f->slice); my $new_f_key = $self->get_feature_key($new_f,1); # get the string key without the sequence version my $stored_f = $db_feat{$new_f_key}; if ($stored_f && $stored_f->display_id !~ /\.\d+/) { print STDOUT "Finished: Remove old Uniprot feature ".$stored_f->display_id. ", replaced by ".$new_f->display_id."\n"; $adaptor->remove($stored_f); delete $db_feat{$new_f_key} unless(!$db_feat{$new_f_key}); delete $db_feat_to_del{$new_f_key} unless(!$db_feat_to_del{$new_f_key}); } } } ## remove duplicated features from output for (my $i = 0 ; $i < @$output ;) { my $feature = $output->[$i]; $feature->slice($self->query) if (!$feature->slice); my $f_key = $self->get_feature_key($feature); if ($db_feat{$f_key}) { splice(@$output, $i, 1); delete $db_feat_to_del{$f_key} unless(!$db_feat_to_del{$f_key}); } else { $i++; } } ## remove the old features present in the db and not in the output if($all) { foreach my $f (values(%db_feat_to_del)) { $adaptor->remove($f);} print STDOUT "Finished: Removed ", scalar(keys(%db_feat_to_del)), " old features from db\n"; } } sub remove_all_features { my ($self, $adaptor) = @_; my $db_features = $adaptor->fetch_all_by_Slice($self->query, $self->analysis->logic_name); foreach my $f (@$db_features) { $adaptor->remove($f);} print STDOUT "Finished: Removed ", scalar(@$db_features), " features through ".ref($adaptor)." \n"; } sub write_descriptions { my( $self, $output ) = @_; my $dbobj = $self->db; my $type = $self->analysis->logic_name =~ /uniprot/i ? 'protein':'dna'; my $seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::Finished_Dfetch->new(-type => $type, -analysis => $self->analysis); my %single_ids = map { $_->hseqname => 1} @$output; my @ids = keys(%single_ids); $seqfetcher->write_descriptions( $dbobj, \@ids ); } sub get_feature_key { my ( $self, $feat, $uni ) = @_; throw( "Must pass Bio::EnsEMBL::Analysis::RunnableDB::Finished::get_feature_key a Bio::EnsEMBL::BaseAlignFeature" . "not a " . $feat ) unless ( $feat->isa('Bio::EnsEMBL::BaseAlignFeature') ); my $name = $feat->display_id; $name =~ s/\.\d// if($uni); return join( ':', $name, $feat->seq_region_name, $feat->cigar_string, $feat->start, $feat->end, $feat->hstart, $feat->hend ); } sub get_adaptor { my ($self, $feat) = @_; if ( $feat->isa('Bio::EnsEMBL::DnaPepAlignFeature') ) { return $self->db->get_ProteinAlignFeatureAdaptor; } elsif ( $feat->isa('Bio::EnsEMBL::DnaDnaAlignFeature') ) { return $self->db->get_DnaAlignFeatureAdaptor; } elsif ( $feat->isa('Bio::EnsEMBL::SimpleFeature') ) { return $self->db->get_SimpleFeatureAdaptor; } else { throw('unexpected feature type: '. ref($feat)); } } 1; =head1 NAME - Bio::EnsEMBL::Analysis::RunnableDB::Finished =head1 AUTHOR Mustapha Larbaoui B<email> ml6@sanger.ac.uk