Bio::EnsEMBL::Analysis::RunnableDB::Finished
HalfwiseHMM
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Finished::HalfwiseHMM
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::Similarity qw ( GB_SIMILARITY_DATABASES )
BlastableVersion
Data::Dumper
Inherit
Synopsis
my $obj = Bio::EnsEMBL::Analysis::RunnableDB::Finished::HalfwiseHMM->new(
-db => $db,
-input_id => $id
);
$obj->fetch_input
$obj->run
my @newfeatures = $obj->output;
Description
runs HalfwiseHMM runnable and converts it output into genes which can be stored in an ensembl database
Methods
Methods description
Arg : none Function : takes the features from the halfwise runnable and runs _make_genes to convert them into Bio::EnsEMBL::Genes with appropriately attached exons and supporting evidence Exception: thows if there are no analysis types Caller : Example : |
Title : make_genes Usage : $self->make_genes Function: makes Bio::EnsEMBL::Genes out of the output from runnables Returns : array of Bio::EnsEMBL::Gene Args : $genetype: string $analysis_obj: Bio::EnsEMBL::Analysis $results: reference to an array of FeaturePairs |
Title : make_transcript Usage : $self->make_transcript($gene, $slice, $genetype) Function: makes a Bio::EnsEMBL::Transcript from a SeqFeature representing a gene, with sub_SeqFeatures representing exons. Example : Returns : Bio::EnsEMBL::Transcript with Bio::EnsEMBL:Exons(with supporting feature data), and a Bio::EnsEMBL::translation Args : $gene: Bio::EnsEMBL::SeqFeatureI, $contig: Bio::EnsEMBL::RawContig, $genetype: string, $analysis_obj: Bio::EnsEMBL::Analysis |
Arg : none Function : fetches the repeatmasked sequence and the uniprot features for the slice being run on and creates the HalfwiseHMM Runnable Exception: throws if no input_id has been provided Caller : Example : |
Title : get_db_version [ distinguished from RunnableDB::*::db_version_searched() ] Useage : $self->get_db_version('/data/base/path') $obj->get_db_version() Function: Set a blast database version from the supplied path Get a blast database version from previously supplied path Uses tjrc''s BlastableVersion module. Returns : String Args : String (should be a full database path) Caller : $self::fetch_databases() RunnableDB::Finished_EST::db_version_searched() |
Arg : all those inherited from RunnableDB Function : Make a new HalfwiseHMM object defining the above variables Exception: thrown if no input id is provided, this is found in RunnableDB Caller : Example : $runnable = Bio::EnsEMBL::Analysis::RunnableDB::HalfwiseHMM new->(-db => $db -INPUT_ID => $id -ANALYSIS => $analysis); |
Title : output Usage : Function: get/set for output array Example : Returns : array of Bio::EnsEMBL::Gene Args : |
Arg : a Bio::EnsEMBL::Analysis::Runnable Function : Gets/sets the runnable Exception: throws if argument passed isn't a runnable Caller : Example :' |
Arg : none Function : writes the converted output to the database as genes Exception: none Caller : Example : |
Methods code
_check_that_external_db_table_populated | description | prev | next | Top |
sub _check_that_external_db_table_populated
{ my ( $self, $release, $name, $status ) = @_;
$status ||= 'XREF';
my $db = $self->db();
my $find_tuple_sql = qq(SELECT count(*) AS tuple_exists
FROM external_db
WHERE db_name = ?
&& db_release = ?);
my $sth = $db->prepare($find_tuple_sql);
$sth->execute( $name, $release );
my $tuple = $sth->fetchrow_hashref() || {};
$sth->finish();
return if $tuple->{'tuple_exists'};
$sth = $db->prepare("LOCK TABLES external_db WRITE");
$sth->execute();
$sth->finish();
my $max_db_id_sql =
q``SELECT MAX(external_db_id) + 1 AS next_db_id from external_db`;
$sth = $db->prepare($max_db_id_sql);
$sth->execute();
my $row = $sth->fetchrow_hashref || {};
my $max_db_id = $row->{'next_db_id'} || warning "Error";
my $insert_sql =
q``INSERT INTO external_db (external_db_id, db_name, db_release, status) VALUES(?, ?, ?, ?)`;
$sth = $db->prepare($insert_sql);
$sth->execute( $max_db_id, $name, $release, $status );
$sth->finish();
$sth = $db->prepare("UNLOCK TABLES");
$sth->execute();
return $max_db_id; } |
sub _convert_output
{
my ($self) = @_;
my @genes;
my $analysis = $self->analysis();
my $genetype = 'Halfwise';
my @out;
foreach my $runnable ( $self->runnable ) {
push( @out, $runnable->output );
$self->pfam_lookup( $runnable->pfam_lookup )
if $runnable->can('pfam_lookup');
}
my @g = $self->_make_genes( $genetype, $analysis,\@ out );
push( @genes, @g );
$self->output(@genes);
}
} |
sub _make_genes
{
my ( $self, $genetype, $analysis_obj, $results ) = @_;
my $sliceid = $self->input_id;
my $sa = $self->db->get_SliceAdaptor();
my $slice = $sa->fetch_by_name($sliceid);
my @genes;
my $info_type = 'MISC';
my $info_text = 'Relationship generated from genewisedb search';
my $pfam_lookup = $self->pfam_lookup();
my $pfam_release = $self->get_db_version();
$self->_check_that_external_db_table_populated( $pfam_release, 'PFAM',
'XREF' );
foreach my $tmp_gene (@$results) {
my $pfam_id = $tmp_gene->seqname();
my $acc_ver = $pfam_lookup->{$pfam_id}->[0];
my @pfamacc_ver = split /\./, $acc_ver;
my $dbentry = Bio::EnsEMBL::DBEntry->new(
-primary_id => $pfamacc_ver[0],
-display_id => $pfam_id,
-version => $pfamacc_ver[1],
-release => $pfam_release,
-dbname => "PFAM",
-description => $pfam_lookup->{$pfam_id}->[1],
-info_type => $info_type,
-info_text => $info_text
);
$dbentry->status('XREF');
my $gene = Bio::EnsEMBL::Gene->new();
my $transcript =
$self->_make_transcript( $tmp_gene, $slice, $genetype,
$analysis_obj );
$gene->biotype($genetype);
$gene->analysis($analysis_obj);
$gene->add_Transcript($transcript);
$gene->add_DBEntry($dbentry);
$gene->display_xref();
push( @genes, $gene );
}
return @genes; } |
sub _make_transcript
{
my ( $self, $gene, $slice, $genetype, $analysis_obj ) = @_;
$genetype = 'unspecified' unless defined($genetype);
unless ( $gene->isa("Bio::EnsEMBL::SeqFeatureI") ) {
print "$gene must be Bio::EnsEMBL::SeqFeatureI\n";
}
my $transcript = Bio::EnsEMBL::Transcript->new();
my $translation = Bio::EnsEMBL::Translation->new();
$transcript->translation($translation);
$transcript->analysis($analysis_obj);
my $excount = 1;
my @exons;
foreach my $exon_pred ( $gene->sub_SeqFeature ) {
my $exon = Bio::EnsEMBL::Exon->new();
my $sa = $slice->adaptor();
$exon->display_id( $sa->get_seq_region_id($slice) );
$exon->start( $exon_pred->start );
$exon->end( $exon_pred->end );
$exon->strand( $exon_pred->strand );
$exon->phase( $exon_pred->phase || 0 );
$exon->end_phase(0);
$exon->slice($slice);
foreach my $subf ( $exon_pred->sub_SeqFeature ) {
$subf->feature1->seqname( $slice->get_seq_region_id );
$subf->feature1->score(100);
$subf->feature1->analysis($analysis_obj);
$subf->feature2->score(100);
$subf->feature2->analysis($analysis_obj);
$exon->add_Supporting_Feature($subf);
}
push( @exons, $exon );
$excount++;
}
if ( @exons < 0 ) {
}
else {
if ( $exons[0]->strand == -1 ) {
@exons = sort { $b->start <=> $a->start } @exons;
}
else {
@exons = sort { $a->start <=> $b->start } @exons;
}
foreach my $exon (@exons) {
$transcript->add_Exon($exon);
}
$translation->start_Exon( $exons[0] );
$translation->end_Exon( $exons[$#exons] );
if ( $exons[0]->phase == 0 ) {
$translation->start(1);
}
elsif ( $exons[0]->phase == 1 ) {
$translation->start(3);
}
elsif ( $exons[0]->phase == 2 ) {
$translation->start(2);
}
$translation->end( $exons[$#exons]->end - $exons[$#exons]->start + 1 );
}
return $transcript; } |
sub db_version_searched
{ my ( $self, $arg ) = @_;
$self->{'_db_version_searched'} = $arg if $arg;
return $self->{'_db_version_searched'}; } |
sub fetch_input
{
my ($self) = @_;
my %ests;
my @estseqs;
throw("No input id") unless defined( $self->input_id );
my $sliceid = $self->input_id;
my $sa = $self->db->get_SliceAdaptor();
my $slice = $sa->fetch_by_name($sliceid);
$slice->{'seq'} = $slice->seq();
$self->query($slice);
my $maskedslice =
$slice->get_repeatmasked_seq( $ANALYSIS_REPEAT_MASKING, $SOFT_MASKING )
or throw("Unable to fetch contig");
my $alignAdaptor = $self->db->get_ProteinAlignFeatureAdaptor();
my $stateinfocontainer = $self->db->get_StateInfoContainer;
my $analysis_adaptor = $self->db->get_AnalysisAdaptor();
foreach my $database ( @{$GB_SIMILARITY_DATABASES} ) {
my $analysis_ln = $database->{'type'};
my $threshold = $database->{'threshold'};
my $fps = [];
my $features =
$alignAdaptor->fetch_all_by_Slice_and_pid( $slice,
$threshold,
$analysis_ln );
print STDERR
"Number of features matching threshold $database->{'threshold'} = "
. scalar(@$features) . "\n";
foreach my $f (@$features) {
if ( UNIVERSAL::isa( $f, "Bio::EnsEMBL::FeaturePair" )
&& defined( $f->hseqname ) )
{
push( @$fps, $f );
}
}
my $runnable =
Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM->new(
'-query' => $maskedslice,
'-features' => $fps,
'-pfamdb' => $self->getPfamDB(),
'-options' => $self->analysis->parameters(),
'-program' => $self->analysis->program(),
'-analysis' => $self->analysis
);
$self->runnable($runnable);
my $ana = $analysis_adaptor->fetch_by_logic_name($analysis_ln);
my $uniprot_db_version = $stateinfocontainer->fetch_db_version($self->input_id,$ana);
my $pfam_db_version = $self->get_db_version();
$self->db_version_searched(join('_',$uniprot_db_version,$pfam_db_version));
} } |
sub getPfamDB
{
my ($self) = @_;
unless ( $self->{'_pfam_db'} ) {
my $pfam_meta = $self->db->get_MetaContainer();
my $value = $pfam_meta->list_value_by_key('pfam_db')
|| throw("please enter pfam_db key - value into meta table\n");
my $pfam_db_conn = $self->make_hash_from_meta_value( $value->[0] );
my $db_file_path = $self->analysis->db_file;
my $db_file_version = $self->get_db_version($db_file_path);
if ( $db_file_version =~ /^(\d+).(\d+)$/ ) {
$pfam_db_conn->{'-dbname'} = "pfam_$1_$2";
}
elsif ( $db_file_version =~ /^(\d+)$/ ) {
$pfam_db_conn->{'-dbname'} = "pfam_$1_0";
}
$self->{'_pfam_db'} =
Bio::EnsEMBL::DBSQL::DBConnection->new(%$pfam_db_conn);
}
return $self->{'_pfam_db'}; } |
sub get_db_version
{ my ( $self, $db ) = @_;
my $debug_this = 1; my $force_dbi = 1; unless ( $self->{'_pfam_db_version'} ) {
if ($db) {
$BlastableVersion::debug = $debug_this;
warning
"BlastableVersion is cvs revision $BlastableVersion::revision\n "
if $debug_this;
my $ver = eval {
my $blast_ver = BlastableVersion->new();
$blast_ver->force_dbi($force_dbi); $blast_ver->get_version($db);
$blast_ver;
};
throw("I failed to get a BlastableVersion for $db [$@]") if $@;
my $dbv = $ver->version();
my $sgv = $ver->sanger_version();
my $name = $ver->name();
my $date = $ver->date();
unless ($dbv) {
throw( "I know nothing about $db I tried to find out:\n"
. " - name <"
. $name . ">\n"
. " - date <"
. $date . ">\n"
. " - version <"
. $dbv . ">\n"
. " - sanger_version <"
. $sgv
. ">\n" );
}
$self->{'_pfam_db_version'} = $dbv;
}
else {
throw("Must provide a db file name as argument");
}
}
return $self->{'_pfam_db_version'}; } |
sub make_hash_from_meta_value
{
my ( $self, $string ) = @_;
if ($string) {
my $hash = { eval $string };
if($@) {
die "error evaluating $string [$@]";
} else {
return $hash || {};
}
}
return {}; } |
sub new
{
my ( $new, @args ) = @_;
my $self = $new->SUPER::new(@args);
my ( $type, $threshold ) = rearrange( [qw(TYPE THRESHOLD)], @args );
$self->{' '} = []; return $self; } |
sub output
{ my ( $self, @genes ) = @_;
if ( !defined( $self->{'_output'} ) ) {
$self->{'_output'} = [];
}
if ( scalar(@genes) ) {
push( @{ $self->{'_output'} }, @genes );
}
return @{ $self->{'_output'} };
}
1; } |
sub pfam_lookup
{ my ( $self, $hash_ref ) = @_;
if ( ref($hash_ref) eq 'HASH' ) {
$self->{'_pfam_lookup'} ||= {};
$self->{'_pfam_lookup'} =
{ %{ $self->{'_pfam_lookup'} }, %{$hash_ref} };
}
return $self->{'_pfam_lookup'}; } |
sub run
{ my ($self) = @_;
foreach my $runnable ( $self->runnable ) {
$runnable || throw("Can't run - no runnable object");
print STDERR "using " . $runnable . "\n";
eval { $runnable->run; };
if ( my $err = $@ ) {
chomp $err;
$self->failing_job_status($1)
if $err =~ /^\"([A-Z_]{1,40})\"$/i; throw("$@");
}
}
$self->_convert_output(); } |
sub runnable
{ my ( $self, $arg ) = @_;
if ( !defined( $self->{'_seqfetchers'} ) ) {
$self->{'_seqfetchers'} = [];
}
if ( defined($arg) ) {
throw("[$arg] is not a Bio::EnsEMBL::Analysis::Runnable")
unless $arg->isa("Bio::EnsEMBL::Analysis::Runnable");
push( @{ $self->{_runnable} }, $arg );
}
return @{ $self->{_runnable} }; } |
sub threshold
{
my ( $self, $threshold ) = @_;
if ( defined($threshold) ) {
$self->{_threshold} = $threshold;
}
return $self->{_threshold}; } |
sub type
{
my ( $self, $type ) = @_;
if ( defined($type) ) {
$self->{_type} = $type;
}
return $self->{_type}; } |
sub write_output
{
my ($self) = @_;
my @times = times;
print STDERR "started writing @times\n ";
my @genes = $self->output();
my $db = $self->db();
my $gene_adaptor = $db->get_GeneAdaptor;
my $dbh = $db->dbc->db_handle;
$dbh->begin_work;
eval {
GENE: foreach my $gene (@genes)
{
$gene_adaptor->store($gene,0);
}
$dbh->commit;
};
if ($@) {
$dbh->rollback;
throw("UNABLE TO WRITE GENES IN DATABASE\n[$@]\n");
}
@times = times;
print STDERR "finished writing @times\n ";
return 1; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _