Bio::EnsEMBL::Analysis::RunnableDB::Finished HalfwiseHMM
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Finished::HalfwiseHMM
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::Similarity qw ( GB_SIMILARITY_DATABASES )
Bio::EnsEMBL::Analysis::Config::General
Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM
Bio::EnsEMBL::Analysis::RunnableDB::Finished
Bio::EnsEMBL::DBEntry
Bio::EnsEMBL::DBSQL::DBAdaptor
Bio::EnsEMBL::DBSQL::DBConnection
Bio::EnsEMBL::Exon
Bio::EnsEMBL::Gene
Bio::EnsEMBL::Transcript
Bio::EnsEMBL::Translation
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( verbose throw warning )
BlastableVersion
Data::Dumper
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::Finished
Synopsis
    my $obj = Bio::EnsEMBL::Analysis::RunnableDB::Finished::HalfwiseHMM->new(
-db => $db,
-input_id => $id
);
$obj->fetch_input
$obj->run
my @newfeatures = $obj->output;
Description
runs HalfwiseHMM runnable and converts it output into genes which can be stored in an ensembl database
Methods
_check_that_external_db_table_populated
No description
Code
_convert_outputDescriptionCode
_make_genesDescriptionCode
_make_transcriptDescriptionCode
db_version_searched
No description
Code
fetch_inputDescriptionCode
getPfamDB
No description
Code
get_db_versionDescriptionCode
make_hash_from_meta_value
No description
Code
newDescriptionCode
outputDescriptionCode
pfam_lookup
No description
Code
run
No description
Code
runnableDescriptionCode
threshold
No description
Code
type
No description
Code
write_outputDescriptionCode
Methods description
_convert_outputcode    nextTop
    Arg      : none
Function : takes the features from the halfwise runnable and runs _make_genes to convert them into Bio::EnsEMBL::Genes with appropriately attached exons and supporting evidence
Exception: thows if there are no analysis types
Caller :
Example :
_make_genes(2)codeprevnextTop
  Title   :   make_genes
Usage : $self->make_genes
Function: makes Bio::EnsEMBL::Genes out of the output from runnables
Returns : array of Bio::EnsEMBL::Gene
Args : $genetype: string
$analysis_obj: Bio::EnsEMBL::Analysis
$results: reference to an array of FeaturePairs
_make_transcriptcodeprevnextTop
 Title   : make_transcript
Usage : $self->make_transcript($gene, $slice, $genetype)
Function: makes a Bio::EnsEMBL::Transcript from a SeqFeature representing a gene,
with sub_SeqFeatures representing exons.
Example :
Returns : Bio::EnsEMBL::Transcript with Bio::EnsEMBL:Exons(with supporting feature
data), and a Bio::EnsEMBL::translation
Args : $gene: Bio::EnsEMBL::SeqFeatureI, $contig: Bio::EnsEMBL::RawContig,
$genetype: string, $analysis_obj: Bio::EnsEMBL::Analysis
fetch_inputcodeprevnextTop
    Arg      : none
Function : fetches the repeatmasked sequence and the uniprot features for the slice being run on and creates the HalfwiseHMM Runnable
Exception: throws if no input_id has been provided
Caller :
Example :
get_db_versioncodeprevnextTop
    Title   :  get_db_version
[ distinguished from RunnableDB::*::db_version_searched() ]
Useage : $self->get_db_version('/data/base/path')
$obj->get_db_version()
Function: Set a blast database version from the supplied path
Get a blast database version from previously supplied path
Uses tjrc''s BlastableVersion module.
Returns : String
Args : String (should be a full database path)
Caller : $self::fetch_databases()
RunnableDB::Finished_EST::db_version_searched()
newcodeprevnextTop
    Arg      : all those inherited from RunnableDB
Function : Make a new HalfwiseHMM object defining the above variables
Exception: thrown if no input id is provided, this is found in RunnableDB
Caller :
Example : $runnable = Bio::EnsEMBL::Analysis::RunnableDB::HalfwiseHMM new->(-db => $db
-INPUT_ID => $id
-ANALYSIS => $analysis);
outputcodeprevnextTop
 Title   : output
Usage :
Function: get/set for output array
Example :
Returns : array of Bio::EnsEMBL::Gene
Args :
runnablecodeprevnextTop
    Arg      : a Bio::EnsEMBL::Analysis::Runnable
Function : Gets/sets the runnable
Exception: throws if argument passed isn't a runnable
Caller :
Example :'
write_outputcodeprevnextTop
    Arg      : none
Function : writes the converted output to the database as genes
Exception: none
Caller :
Example :
Methods code
_check_that_external_db_table_populateddescriptionprevnextTop
sub _check_that_external_db_table_populated {
	my ( $self, $release, $name, $status ) = @_;
	$status ||= 'XREF';
	my $db             = $self->db();
	my $find_tuple_sql = qq(SELECT count(*) AS tuple_exists
			  FROM external_db
			  WHERE db_name = ?
			  && db_release = ?);
	my $sth = $db->prepare($find_tuple_sql);
	$sth->execute( $name, $release );
	my $tuple = $sth->fetchrow_hashref() || {};
	$sth->finish();

	# if there is one return do nothing and the job can get on as normal
return if $tuple->{'tuple_exists'}; # else lock table external_db write
$sth = $db->prepare("LOCK TABLES external_db WRITE"); $sth->execute(); $sth->finish(); # get the next external_db_id
my $max_db_id_sql = q``SELECT MAX(external_db_id) + 1 AS next_db_id from external_db`; $sth = $db->prepare($max_db_id_sql); $sth->execute(); my $row = $sth->fetchrow_hashref || {}; my $max_db_id = $row->{'next_db_id'} || warning "Error"; # insert the row
my $insert_sql = q``INSERT INTO external_db (external_db_id, db_name, db_release, status) VALUES(?, ?, ?, ?)`; $sth = $db->prepare($insert_sql); $sth->execute( $max_db_id, $name, $release, $status ); $sth->finish(); # unlock tables;
$sth = $db->prepare("UNLOCK TABLES"); $sth->execute(); return $max_db_id;
}
_convert_outputdescriptionprevnextTop
sub _convert_output {
	my ($self) = @_;
	my @genes;
	my $analysis = $self->analysis();
	my $genetype = 'Halfwise';

	# make an array of genes for each runnable
my @out; foreach my $runnable ( $self->runnable ) { push( @out, $runnable->output ); $self->pfam_lookup( $runnable->pfam_lookup ) if $runnable->can('pfam_lookup'); } my @g = $self->_make_genes( $genetype, $analysis,\@ out ); push( @genes, @g ); $self->output(@genes); } # get/set for lookup multi-valued hash { pfam_id => [pfam_acc, pfam_desc], ... }
# can append multiple to the lookup (see { %{$self->{'_pfam_lookup'}}, %{$hash_ref} })
}
_make_genesdescriptionprevnextTop
sub _make_genes {
	my ( $self, $genetype, $analysis_obj, $results ) = @_;
	my $sliceid = $self->input_id;
	my $sa      = $self->db->get_SliceAdaptor();
	my $slice   = $sa->fetch_by_name($sliceid);
	my @genes;
	my $info_type	= 'MISC';
	my $info_text	= 'Relationship generated from genewisedb search';

	# fetch lookup multi-valued hash { pfam_id => [pfam_acc, pfam_desc], ... }
my $pfam_lookup = $self->pfam_lookup(); my $pfam_release = $self->get_db_version(); $self->_check_that_external_db_table_populated( $pfam_release, 'PFAM', 'XREF' ); foreach my $tmp_gene (@$results) { my $pfam_id = $tmp_gene->seqname(); my $acc_ver = $pfam_lookup->{$pfam_id}->[0]; my @pfamacc_ver = split /\./, $acc_ver; my $dbentry = Bio::EnsEMBL::DBEntry->new( -primary_id => $pfamacc_ver[0], -display_id => $pfam_id, -version => $pfamacc_ver[1], -release => $pfam_release, -dbname => "PFAM", -description => $pfam_lookup->{$pfam_id}->[1], -info_type => $info_type, -info_text => $info_text ); $dbentry->status('XREF'); my $gene = Bio::EnsEMBL::Gene->new(); my $transcript = $self->_make_transcript( $tmp_gene, $slice, $genetype, $analysis_obj ); $gene->biotype($genetype); $gene->analysis($analysis_obj); $gene->add_Transcript($transcript); $gene->add_DBEntry($dbentry); $gene->display_xref(); push( @genes, $gene ); } return @genes;
}
_make_transcriptdescriptionprevnextTop
sub _make_transcript {
	my ( $self, $gene, $slice, $genetype, $analysis_obj ) = @_;
	$genetype = 'unspecified' unless defined($genetype);

	unless ( $gene->isa("Bio::EnsEMBL::SeqFeatureI") ) {
		print "$gene must be Bio::EnsEMBL::SeqFeatureI\n";
	}

	my $transcript  = Bio::EnsEMBL::Transcript->new();
	my $translation = Bio::EnsEMBL::Translation->new();

	$transcript->translation($translation);
	$transcript->analysis($analysis_obj);

	my $excount = 1;
	my @exons;

	foreach my $exon_pred ( $gene->sub_SeqFeature ) {

		# make an exon
my $exon = Bio::EnsEMBL::Exon->new(); my $sa = $slice->adaptor(); $exon->display_id( $sa->get_seq_region_id($slice) ); $exon->start( $exon_pred->start ); $exon->end( $exon_pred->end ); $exon->strand( $exon_pred->strand ); $exon->phase( $exon_pred->phase || 0 ); $exon->end_phase(0); $exon->slice($slice); # sort out supporting evidence for this exon prediction
foreach my $subf ( $exon_pred->sub_SeqFeature ) { $subf->feature1->seqname( $slice->get_seq_region_id ); $subf->feature1->score(100); $subf->feature1->analysis($analysis_obj); $subf->feature2->score(100); $subf->feature2->analysis($analysis_obj); $exon->add_Supporting_Feature($subf); } push( @exons, $exon ); $excount++; } if ( @exons < 0 ) { # printSTDERR "Odd. No exons found\n";
} else { if ( $exons[0]->strand == -1 ) { @exons = sort { $b->start <=> $a->start } @exons; } else { @exons = sort { $a->start <=> $b->start } @exons; } foreach my $exon (@exons) { $transcript->add_Exon($exon); } $translation->start_Exon( $exons[0] ); $translation->end_Exon( $exons[$#exons] ); if ( $exons[0]->phase == 0 ) { $translation->start(1); } elsif ( $exons[0]->phase == 1 ) { $translation->start(3); } elsif ( $exons[0]->phase == 2 ) { $translation->start(2); } $translation->end( $exons[$#exons]->end - $exons[$#exons]->start + 1 ); } return $transcript;
}
db_version_searcheddescriptionprevnextTop
sub db_version_searched {
	my ( $self, $arg ) = @_;
	$self->{'_db_version_searched'} = $arg if $arg;
	return $self->{'_db_version_searched'};
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
	my ($self) = @_;
	my %ests;
	my @estseqs;
	throw("No input id") unless defined( $self->input_id );
	my $sliceid = $self->input_id;
	my $sa      = $self->db->get_SliceAdaptor();
	my $slice   = $sa->fetch_by_name($sliceid);
	$slice->{'seq'} = $slice->seq();
	$self->query($slice);
	my $maskedslice =
	  $slice->get_repeatmasked_seq( $ANALYSIS_REPEAT_MASKING, $SOFT_MASKING )
	  or throw("Unable to fetch contig");
	my $alignAdaptor = $self->db->get_ProteinAlignFeatureAdaptor();
	my $stateinfocontainer = $self->db->get_StateInfoContainer;
	my $analysis_adaptor = $self->db->get_AnalysisAdaptor();

	foreach my $database ( @{$GB_SIMILARITY_DATABASES} ) {
		my $analysis_ln = $database->{'type'};
		my $threshold   = $database->{'threshold'};
		my $fps      = [];
		my $features =
		  $alignAdaptor->fetch_all_by_Slice_and_pid( $slice,
			$threshold,
			$analysis_ln );
		print STDERR
		  "Number of features matching threshold $database->{'threshold'} = "
		  . scalar(@$features) . "\n";
		foreach my $f (@$features) {
			if ( UNIVERSAL::isa( $f, "Bio::EnsEMBL::FeaturePair" )
				&& defined( $f->hseqname ) )
			{
				push( @$fps, $f );
			}
		}

		my $runnable =
		  Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM->new(
			'-query'    => $maskedslice,
			'-features' => $fps,
			'-pfamdb'   => $self->getPfamDB(),
			'-options'  => $self->analysis->parameters(),
			'-program'  => $self->analysis->program(),
			'-analysis' => $self->analysis
		  );
		$self->runnable($runnable);

		# set the db version searched which is a concatenation of Uniprot and Pfam db versions
my $ana = $analysis_adaptor->fetch_by_logic_name($analysis_ln); my $uniprot_db_version = $stateinfocontainer->fetch_db_version($self->input_id,$ana); my $pfam_db_version = $self->get_db_version(); $self->db_version_searched(join('_',$uniprot_db_version,$pfam_db_version)); }
}
getPfamDBdescriptionprevnextTop
sub getPfamDB {
	my ($self) = @_;
	unless ( $self->{'_pfam_db'} ) {
		my $pfam_meta = $self->db->get_MetaContainer();
		my $value     = $pfam_meta->list_value_by_key('pfam_db')
		  || throw("please enter pfam_db key - value into meta table\n");
		my $pfam_db_conn = $self->make_hash_from_meta_value( $value->[0] );

		# Use the Blast tracking system to set the correct Pfam DB name
my $db_file_path = $self->analysis->db_file; my $db_file_version = $self->get_db_version($db_file_path); if ( $db_file_version =~ /^(\d+).(\d+)$/ ) { $pfam_db_conn->{'-dbname'} = "pfam_$1_$2"; } elsif ( $db_file_version =~ /^(\d+)$/ ) { $pfam_db_conn->{'-dbname'} = "pfam_$1_0"; } $self->{'_pfam_db'} = Bio::EnsEMBL::DBSQL::DBConnection->new(%$pfam_db_conn); } return $self->{'_pfam_db'};
}
get_db_versiondescriptionprevnextTop
sub get_db_version {
	my ( $self, $db ) = @_;
	my $debug_this = 1;    # this just shows debug info.
my $force_dbi = 1; # this will force a dbi call SLOW!!!!!!
unless ( $self->{'_pfam_db_version'} ) { if ($db) { $BlastableVersion::debug = $debug_this; warning "BlastableVersion is cvs revision $BlastableVersion::revision\n " if $debug_this; my $ver = eval { my $blast_ver = BlastableVersion->new(); $blast_ver->force_dbi($force_dbi); # if set will be SLOW.
$blast_ver->get_version($db); $blast_ver; }; throw("I failed to get a BlastableVersion for $db [$@]") if $@; my $dbv = $ver->version(); my $sgv = $ver->sanger_version(); my $name = $ver->name(); my $date = $ver->date(); unless ($dbv) { throw( "I know nothing about $db I tried to find out:\n" . " - name <" . $name . ">\n" . " - date <" . $date . ">\n" . " - version <" . $dbv . ">\n" . " - sanger_version <" . $sgv . ">\n" ); } $self->{'_pfam_db_version'} = $dbv; } else { throw("Must provide a db file name as argument"); } } return $self->{'_pfam_db_version'};
}
make_hash_from_meta_valuedescriptionprevnextTop
sub make_hash_from_meta_value {
	my ( $self, $string ) = @_;
	if ($string) {
		my $hash = { eval $string };
		if($@) {
			die "error evaluating $string [$@]";
		} else {
			return $hash || {};
		}
	}

	return {};
}
newdescriptionprevnextTop
sub new {
	my ( $new, @args ) = @_;
	my $self = $new->SUPER::new(@args);

	# db, input_id, seqfetcher, and analysis objects are all set in
# in superclass constructor (RunnableDB.pm)
my ( $type, $threshold ) = rearrange( [qw(TYPE THRESHOLD)], @args ); $self->{' '} = []; #create key to an array of feature pairs
return $self;
}
outputdescriptionprevnextTop
sub output {
	my ( $self, @genes ) = @_;
	if ( !defined( $self->{'_output'} ) ) {
		$self->{'_output'} = [];
	}
	if ( scalar(@genes) ) {
		push( @{ $self->{'_output'} }, @genes );
	}
	return @{ $self->{'_output'} };
}

1;
}
pfam_lookupdescriptionprevnextTop
sub pfam_lookup {
	my ( $self, $hash_ref ) = @_;
	if ( ref($hash_ref) eq 'HASH' ) {
		$self->{'_pfam_lookup'} ||= {};
		$self->{'_pfam_lookup'} =
		  { %{ $self->{'_pfam_lookup'} }, %{$hash_ref} };
	}
	return $self->{'_pfam_lookup'};
}
rundescriptionprevnextTop
sub run {
	my ($self) = @_;
	foreach my $runnable ( $self->runnable ) {
		$runnable || throw("Can't run - no runnable object");
		print STDERR "using " . $runnable . "\n";
		eval { $runnable->run; };
		if ( my $err = $@ ) {
			chomp $err;
			$self->failing_job_status($1)
			  if $err =~ /^\"([A-Z_]{1,40})\"$/i; # only match '"ABC_DEFGH"'
throw("$@"); } } $self->_convert_output();
}
runnabledescriptionprevnextTop
sub runnable {
	my ( $self, $arg ) = @_;
	if ( !defined( $self->{'_seqfetchers'} ) ) {
		$self->{'_seqfetchers'} = [];
	}
	if ( defined($arg) ) {
		throw("[$arg] is not a Bio::EnsEMBL::Analysis::Runnable")
		  unless $arg->isa("Bio::EnsEMBL::Analysis::Runnable");

		push( @{ $self->{_runnable} }, $arg );
	}

	return @{ $self->{_runnable} };
}
thresholddescriptionprevnextTop
sub threshold {
	my ( $self, $threshold ) = @_;
	if ( defined($threshold) ) {
		$self->{_threshold} = $threshold;
	}
	return $self->{_threshold};
}
typedescriptionprevnextTop
sub type {
	my ( $self, $type ) = @_;
	if ( defined($type) ) {
		$self->{_type} = $type;
	}
	return $self->{_type};
}
write_outputdescriptionprevnextTop
sub write_output {
	my ($self) = @_;
	my @times = times;
	print STDERR "started writing @times\n ";
	my @genes        = $self->output();
	my $db           = $self->db();
	my $gene_adaptor = $db->get_GeneAdaptor;
	my $dbh          = $db->dbc->db_handle;
	$dbh->begin_work;
	eval {
		GENE: foreach my $gene (@genes)
		{
			$gene_adaptor->store($gene,0);
		}
		$dbh->commit;
	};
	if ($@) {
		$dbh->rollback;
		throw("UNABLE TO WRITE GENES IN DATABASE\n[$@]\n");
	}
	@times = times;
	print STDERR "finished writing @times\n ";
	return 1;
}
General documentation
CONTACTTop
lec@sanger.ac.uk
refactored by Sindhu K. Pillai sp1@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _