Bio::EnsEMBL::Analysis::Runnable::Finished HalfwiseHMM
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::Analysis::Runnable::Blast
Bio::EnsEMBL::Analysis::Runnable::Finished::GenewiseHmm
Bio::EnsEMBL::Analysis::Tools::FeatureFilter
Bio::EnsEMBL::FeaturePair
Bio::EnsEMBL::Root
Bio::EnsEMBL::SeqFeature
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::PrimarySeq
Bio::Seq
Bio::SeqIO
Bio::Tools::BPlite
BlastableVersion
Fcntl
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
    my $obj = Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM->new(
-genomic => $seq,
-features => \@features,
);
$obj->run
my @newfeatures = $obj->output;
or something similar????
Description
Finds which pfam domains a particular swissprot hit matches, finds the related hmms and runs genewiseHmm
Methods
add_input_featuresDescriptionCode
add_output_featuresDescriptionCode
all_input_featuresDescriptionCode
analysis
No description
Code
create_genewisehmm_complete
No description
Code
create_genewisehmm_individuallyDescriptionCode
dbm_fileDescriptionCode
display_genes
No description
Code
get_GenewiseHMMDescriptionCode
get_hmmDescriptionCode
get_hmmdb
No description
Code
get_pfam_hmm
No description
Code
get_pfam_idsDescriptionCode
get_swissprot_idsDescriptionCode
hmmdbDescriptionCode
hmmfetchDescriptionCode
hmmfilenameDescriptionCode
memoryDescriptionCode
newDescriptionCode
outputDescriptionCode
pfamDB
No description
Code
pfam_db_version
No description
Code
pfam_lookup
No description
Code
pfam_ls_version
No description
Code
program
No description
Code
queryDescriptionCode
runDescriptionCode
run_genewisehmmDescriptionCode
Methods description
add_input_featurescode    nextTop
    Arg      : reference to array of features
Function : get/set method for input features
Exception: throws an exception if not passed an array ref
Caller :
add_output_featurescodeprevnextTop
    Arg      : array ref to array of output features
Function : adds the array of features to the output feature array
Exception: throws an exception if not passed an array ref
Caller :
all_input_featurescodeprevnextTop
    Arg      : none
Function : returns all input features
Exception: none
Caller :
create_genewisehmm_individually(2)codeprevnextTop
    Arg      : reference to pfam_id hash and directory if it is to be set to anything other than temp
Function : creates a hmm databases of pfam ids, then creates and runs the GenewiseHmm runnable
Exception:
Caller :
dbm_filecodeprevnextTop
    Arg      : dbm filename
Function : gets/sets dbm filename
Exception: none
Caller :
get_GenewiseHMMcodeprevnextTop
    Arg      : the strand of the hit and the memory to be used by genewisehmm
Function : runs the new method of GenewiseHmm and returns the runnable
Exception: none
Caller :
get_hmmcodeprevnextTop
    Arg      : id of hmm to be fetched normally a pfam id
Function : runs hmmfetch on given id
Exception: thows if no hmmfile is created
Caller :
get_pfam_idscodeprevnextTop
    Arg      : reference to a hash of swissprotids and stands
Function : gets all pfam ids for a particular swissprot id and puts them in a hash along with the strand the swissprot id is found on and returns its
Exception: warns if swissprot id has no pfam domains
Caller :
get_swissprot_idscodeprevnextTop
    Arg      : none
Function : gets swissprot ids and strands from input features and returns a hash of these
Exception: none
Caller :
hmmdbcodeprevnextTop
    Arg      : location of hmmdb
Function : gets/sets location of hmmdb
Exception: none
Caller :
hmmfetchcodeprevnextTop
    Arg      : location of hmmfetch program
Function : gets/sets location of hmmfetch program
Exception: none
Caller :
hmmfilenamecodeprevnextTop
    Arg      : hmm filename
Function : gets/sets hmmfilename
Exception: none
Caller :
memorycodeprevnextTop
    Arg      : value memory to be set to, this is an option of how much memory genewise can use when it is run
Function : gets/sets memory
Exception: none
Caller :
newcodeprevnextTop
    Arg      : Bio:Seq obj, reference to array of uniprot features, location of hmmfetch program, location of hmmdb, location of dbm index
Function : Make a new HalfwiseHMM object defining the above variables
Exception: Will throw an exception if no genomic sequence is defined or no features are passed
Caller :
Example : $halfwise = Bio::EnsEMBL::Analysis::Runnable::HalfwiseHMM->new(genomic => $seq
features => \@features);
outputcodeprevnextTop
    Arg      : none
Function : returns the array of output features
Exception: none
Caller :
querycodeprevnextTop
  Arg      : Bio:Seq object
Function : get/set method for query
Exception: throws an exception if not passed a Bio:PrimarySeq
Caller :
runcodeprevnextTop
    Arg      : directory if to be set to anything other than tmp
Function : runs functions which run genewisehmm
Exception: none
Caller :
run_genewisehmmcodeprevnextTop
    Arg       :
Function :
Exception :
Caller : $self->create_genewisehmm_complete, $self->create_genewisehmm_individually
Methods code
add_input_featuresdescriptionprevnextTop
sub add_input_features {
    my ($self, $features) = @_;
    if (ref($features) eq "ARRAY") {
      push(@{$self->{'_input_features'}},@$features);
    } else {
      throw("[$features] is not an array ref.");
    }
}
add_output_featuresdescriptionprevnextTop
sub add_output_features {
    my ($self, $features) = @_;
    #print "adding ".scalar(@$features)." to output\n";
if (ref($features) eq "ARRAY") { push(@{$self->{'_output_features'}},@$features); } else { throw("[$features] is not an array ref."); } #print "total feature no = ".scalar(@{$self->{'_output_features'}})."\n";
} ################
#OUTPUT METHODS#
################
}
all_input_featuresdescriptionprevnextTop
sub all_input_features {
  my ($self) = @_;

  return @{$self->{'_input_features'}};

}


#methods that probably won't be needed
}
analysisdescriptionprevnextTop
sub analysis {
    my ( $self, $analysis ) = @_;
    if ($analysis) {
        $self->{'_analysis'} = $analysis;
    }
    return $self->{'_analysis'};
}
create_genewisehmm_completedescriptionprevnextTop
sub create_genewisehmm_complete {
    my ($self, $pfam_ids) = @_;
    print STDERR "getting hmms\n"; ##########
print STDERR "\nthere are ".scalar(keys(%$pfam_ids))." pfam ids in database\n"; ##########
return unless scalar(keys(%$pfam_ids)); print STDERR "doing the hmm for ids: ". join(" ", keys(%$pfam_ids)) . "\n"; ##########
$self->run_genewisehmm(0); print STDERR "removing hmmfile: ".$self->hmmfilename()."\n"; ##########
unlink $self->hmmfilename(); return undef;
}
create_genewisehmm_individuallydescriptionprevnextTop
sub create_genewisehmm_individually {
    my ($self, $pfam_ids, $dir) = @_;
    print STDERR "getting hmms\n"; ##########
$self->workdir('/tmp') unless ($self->workdir($dir)); $self->checkdir(); my $memory = $self->memory; print STDERR "there are ".scalar(keys(%$pfam_ids))." pfam ids to run\n"; ##########
while(my ($pfam_id, $strand) = each(%$pfam_ids)){ print STDERR "doing the hmm for id: $pfam_id\n"; ##########
$self->get_hmm($pfam_id); if (-z $self->hmmfilename){ warning("hmm file not created :$!"); next; } my @strands = (); if($strand == 1){ push(@strands, $strand); }elsif($strand == -1){ push(@strands, $strand); }elsif($strand == 0){ push(@strands, -1); push(@strands, 1); }else{ throw("strand = ".$strand." something funnies going on : $!\n"); } foreach my $s (@strands){ $self->run_genewisehmm($s); #print STDERR "running on strand: $s\n"; ##########
#my $genewisehmm = $self->get_GenewiseHMM($s, $memory);
#$genewisehmm->run();
#my @features = $genewisehmm->output();
#$self->display_genes(\@features); ##########
#print STDERR "adding ".scalar(@features)." to output\n"; ##########
#$self->add_output_features(\@features);
} print STDERR "removing hmmfile: ".$self->hmmfilename()."\n"; ##########
unlink $self->hmmfilename(); } throw("finished this bit"); return undef;
}
dbm_filedescriptionprevnextTop
sub dbm_file {
    my ($self, $args) = @_;
    if (defined ($args)){
	$self->{'_dbm_file'} = $args;
    }

    return $self->{'_dbm_file'};
}
display_genesdescriptionprevnextTop
sub display_genes {
  my ($self, $result) = @_;
  #Display output
my @results = @$result; foreach my $obj (@results) { print STDERR ("gene: ".$obj->gffstring. "\n"); if ($obj->sub_SeqFeature) { foreach my $exon ($obj->sub_SeqFeature) { print STDERR "Exon: ".$exon->gffstring."\n"; if ($exon->sub_SeqFeature) { foreach my $sub ($exon->sub_SeqFeature){ print STDERR "supporting features: ".$sub->gffstring."\n"; } } } } } } 1;
}
get_GenewiseHMMdescriptionprevnextTop
sub get_GenewiseHMM {
  my ($self, $strand, $memory) = @_;
  my $reverse = ($strand == -1 ? 1 : undef);
  print STDERR "creating genewisehmm strand $strand reverse $reverse\n"; ##########
print STDERR "OPTIONS To Genewise: ".$self->options()."\n"; ##########
# $genewisehmm->set_environment("/usr/local/ensembl/data/wisecfg/");
$ENV{WISECONFIGDIR} = "/usr/local/ensembl/data/wisecfg/"; my $genewisehmm = Bio::EnsEMBL::Analysis::Runnable::Finished::GenewiseHmm->new('-query' => $self->query(), '-memory' => $memory, '-hmmfile' => $self->hmmfilename(), '-reverse' => $reverse, '-genewise' => $self->program(), '-options' => $self->options(), '-analysis' => $self->analysis(), ); return $genewisehmm;
}
get_hmmdescriptionprevnextTop
sub get_hmm {
  my ($self, $id) = @_;
  #print "getting hmms\n";
$self->hmmfilename($id.".".$$.".hmm"); #print "getting hmm for id ".$id."\n";
my $command = $self->hmmfetch." ".$self->hmmdb." ".$id." > ".$self->hmmfilename; #print "command = ".$command."\n";
#system ('pwd');
eval{ system($command); }; if($@){ warning("hmmfetch threw error : $@\n"); } if (-z $self->hmmfilename){ warning("hmm file not created :$!"); }elsif(-e $self->hmmfilename){ open(ERROR, $self->hmmfilename) or die "couldn't open error file : $!"; while(<ERROR>){ if(/no such hmm/i){ print STDERR "error message : ".$_."\n"; } } close(ERROR) or die "couldn't close error file : $!"; }
}
get_hmmdbdescriptionprevnextTop
sub get_hmmdb {
    my ($self, $pfam_ids) = @_;
    my @pfamIds = keys(%$pfam_ids);
    print STDERR "getting the hmms for ids: ". join(" ", @pfamIds) . "\n"; ##########
my $filename = substr(join("_", @pfamIds),0,20); $self->hmmfilename("$filename.$$.hmmdb"); foreach my $id(@pfamIds){ print "getting hmm for id ".$id."\n"; my $command = $self->hmmfetch." ".$self->hmmdb." ".$id." >> ".$self->hmmfilename; eval{ system($command); }; if($@){ warning("hmmfetch threw error : $@\n"); } if (-z $self->hmmfilename){ warning("hmm file not created :$!"); }elsif(-e $self->hmmfilename){ open(ERROR, $self->hmmfilename) or die "couldn't open error file : $!"; while(<ERROR>){ if(/no such hmm/i){ print STDERR "error message : ".$_."\n"; } } close(ERROR) or die "couldn't close error file : $!"; } }
}
get_pfam_hmmdescriptionprevnextTop
sub get_pfam_hmm {
	my ($self, $uniprot_ids, $dir) = @_;
    my $pfam_accs;
    my $pfam_lookup;
    my $db = $self->pfamDB();

    $self->workdir('/tmp') unless($self->workdir($dir));
    $self->checkdir();

	my $sql = "SELECT
	    			DISTINCT(CONCAT(a.pfamA_acc,'.',a.version)),
	    			a.pfamA_id,
	    			a.description,
	    			ls.hmm_ls
				FROM
				    pfamA_reg_full_significant f,
				    pfamseq p,
				    pfamA a,
				    pfamA_HMM_ls ls
				WHERE
				    f.auto_pfamseq = p.auto_pfamseq AND
				    f.in_full = 1 AND
				    a.auto_pfamA = f.auto_pfamA AND
				    ls.auto_pfamA = f.auto_pfamA AND
				    p.pfamseq_acc IN ";

    my @accessions = keys(%$uniprot_ids);
    map ( s/(\w*(-\d+)?)(\.\d+)?/'$1'/,@accessions);

    # deal with empty @accessions
next unless defined @accessions; $sql .= "(".join(',',@accessions).")"; #print STDOUT $sql."\n";
# create the hmm file
$self->hmmfilename("halfwise.$$.hmmdb"); open (HMMFILE, ">".$self->hmmfilename) or die "couldn't create hmm file ".$self->hmmfilename." $!"; #print STDOUT "HMM file is ".$self->hmmfilename."\n";
my $sth = $db->prepare($sql); $sth->execute(); my ($pfam_acc, $pfam_id, $description, $hmm_ls); $sth->bind_columns(\($pfam_acc, $pfam_id, $description, $hmm_ls)); while(my $row = $sth->fetchrow_arrayref()){ $pfam_lookup->{$pfam_id} = [$pfam_acc, $description]; $pfam_accs->{$pfam_acc} = 1; print HMMFILE $hmm_ls; } $sth->finish(); close HMMFILE; $self->pfam_lookup($pfam_lookup); return $pfam_accs;
}
get_pfam_idsdescriptionprevnextTop
sub get_pfam_ids {
    my ($self, $swissprot_ids) = @_;
    my $pfam_accs;
    my $pfam_lookup;
    # CREATE TEMPORARY TABLE
my $tbl_name = "pipeline_tmp_$$"; my $create_table = qq{CREATE TEMPORARY TABLE $tbl_name( pfamseq_id varchar(12) NOT NULL PRIMARY KEY, strand enum('1','0','-1') DEFAULT '0' )TYPE = HEAP }; # should this be HEAP?
# There's never gonna be that many matches
# to exceed tmp_table_size = 10048576 ???
my $db = $self->pfamDB(); my $sth = $db->prepare($create_table); $sth->execute(); $sth->finish(); # INSERT
my (@binds, @values); my $sql = qq{INSERT IGNORE INTO $tbl_name (pfamseq_id, strand) VALUES }; while (my ($swiss_id, $strand) = each(%$swissprot_ids)){ #print STDERR "$swiss_id : $strand\n";
# strip off sequence version from uniprot ID (introduced in Uniprot release 7.0)
$swiss_id =~ s/\.\d+//; push(@binds, $swiss_id, $strand); push(@values, qq{ (?, ?)}); } if(scalar(@values)){ $sql .= join(", ", @values); #warning $sql;
$sth = $db->prepare($sql); $sth->execute(@binds); $sth->finish(); } # SELECT
# my $select = qq{SELECT a.pfamA_acc, t.strand, t.pfamseq_id, a.pfamA_id, a.description
# FROM pfamA a, pfamA_reg_full f, pfamseq p, $tbl_name t
# WHERE p.auto_pfamseq = f.auto_pfamseq
# && a.auto_pfamA = f.auto_pfamA
# && p.pfamseq_id = t.pfamseq_id
# && f.in_full = 1};
my $select = qq{SELECT CONCAT(a.pfamA_acc,".",a.version), t.strand, t.pfamseq_id, a.pfamA_id, a.description FROM pfamA_reg_full f, pfamseq p, $tbl_name t, pfamA a WHERE f.auto_pfamseq = p.auto_pfamseq && p.pfamseq_acc = t.pfamseq_id && f.in_full = 1 && a.auto_pfamA = f.auto_pfamA;}; $sth = $db->prepare($select); $sth->execute(); my ($pfam_acc, $strand, $swall, $pfam_id, $description); $sth->bind_columns(\($pfam_acc, $strand, $swall, $pfam_id, $description)); while(my $row = $sth->fetchrow_arrayref()){ print STDERR "$swall : $strand : $pfam_acc : $pfam_id\n"; # making pfam_lookup for later ...
$pfam_lookup->{$pfam_id} = [$pfam_acc, $description]; if(defined($pfam_accs->{$pfam_acc})){ $pfam_accs->{$pfam_acc} = ($strand eq $pfam_accs->{$pfam_acc} ? $strand : 0); }else{ $pfam_accs->{$pfam_acc} = $strand; } print "\n pfam_acc: $pfam_acc, strand: $strand, uniprot: $swall,pfamid: $pfam_id,description: $description"; } $self->pfam_lookup($pfam_lookup); # store the lookup somewhere useful.
return $pfam_accs; } # get/set for lookup multi-valued hash { pfam_id => [pfam_acc, pfam_desc], ... }
# can append multiple to the lookup (see { %{$self->{'_pfam_lookup'}}, %{$hash_ref} })
}
get_swissprot_idsdescriptionprevnextTop
sub get_swissprot_ids {
    my ($self) = @_;
    my @features = $self->all_input_features();
    my $swissprot_ids; #{}
foreach my $f(@features){ my $hname = $f->hseqname(); my $strand = $f->strand(); #print "swissprot id = ".$hname."\n";
if(!defined($swissprot_ids->{$hname})){ $swissprot_ids->{$hname} = $strand; }else{ $swissprot_ids->{$hname} = ($strand eq $swissprot_ids->{$hname} ? $strand : 0); } } return $swissprot_ids;
}
hmmdbdescriptionprevnextTop
sub hmmdb {
    my ($self, $args) =@_;

    if (defined($args)){
	$self->{'_hmmdb'} = $args;
    }
    return $self->{'_hmmdb'};
}
hmmfetchdescriptionprevnextTop
sub hmmfetch {
    my ($self, $args) =@_;

    if (defined($args)){
	$self->{'_hmmfetch'} = $args;
    }
    return $self->{'_hmmfetch'};
}
hmmfilenamedescriptionprevnextTop
sub hmmfilename {
    my ($self, $args) = @_;
    if (defined ($args)){
	$self->{'_hmmfilename'} = $args;
    }

    return $self->{'_hmmfilename'};
}
memorydescriptionprevnextTop
sub memory {
    my ($self,$arg) = @_;

    if (defined($arg)) {
	$self->{'_memory'} = $arg;
    }

    return $self->{'_memory'} || 400000;
}

##########
#ANALYSIS#
##########
}
newdescriptionprevnextTop
sub new {
    my ($class,@args) = @_;

    my $self = $class->SUPER::new(@args);
    $self->{'_query'} = undef;
    $self->{'_input_features'} = [];
    $self->{'_output_features'} = [];
    $self->{'_hmmfetch'} = undef;
    $self->{'_hmmdb'} = undef;
    $self->{'_hmmfilename'} = undef; #file to store protein profile hmms
$self->{'_errorfile'} = undef; #file to store any errors hmmfetch throw
$self->{'_dbm_file'} = undef; #print STDERR "args = @args\n";
my ($genomic, $features, $hmmfetch, $hmmdb, $pfamdb, $memory, $options, $analysis,$program) = rearrange([qw(QUERY FEATURES HMMFETCH HMMDB PFAMDB MEMORY OPTIONS ANALYSIS PROGRAM)], @args); throw("No genomic sequence input") unless defined($genomic); $self->query($genomic); throw("No features input") unless defined($features); $self->add_input_features($features); throw("No pfam database") unless $pfamdb; $self->pfamDB($pfamdb); $self->options($options); $self->program($program); throw("No genomic sequence input") unless defined($analysis); $self->analysis($analysis); if ($hmmfetch){ $self->hmmfetch($hmmfetch); } else { $self->hmmfetch('/usr/local/ensembl/bin/hmmfetch'); } if($hmmdb){ $self->hmmdb($hmmdb); } else { $self->hmmdb('/data/blastdb/Finished/Pfam_ls'); } $self->memory ($memory) if (defined($memory)); return $self; # success - we hope!
} #################
#GET/SET METHODS#
#################
}
outputdescriptionprevnextTop
sub output {
  my ($self) = @_;
  #print "outputing data\n";
#print "returning ".scalar(@{$self->{'_output_features'}})." to output\n";
return @{$self->{'_output_features'}};
}
pfamDBdescriptionprevnextTop
sub pfamDB {
    my ($self, $dbobj) = @_;
    $self->{'_pfamDB'} = $dbobj if $dbobj;
    throw("Not a Bio::EnsEMBL::DBSQL::DBAdaptor")
        unless $self->{'_pfamDB'}->isa("Bio::EnsEMBL::DBSQL::DBConnection");
    return $self->{'_pfamDB'};
}
pfam_db_versiondescriptionprevnextTop
sub pfam_db_version {
    my ($self) = @_;
    unless($self->{'_pfam_db_version'}){
        my $db = $self->pfamDB();
        $self->{'_pfam_db_version'} = $db->get_meta_value_by_key('version');
    }
    return $self->{'_pfam_db_version'};
}
pfam_lookupdescriptionprevnextTop
sub pfam_lookup {
    my ($self, $hash_ref) = @_;
    if(ref($hash_ref) eq 'HASH'){
        $self->{'_pfam_lookup'} ||= {};
        $self->{'_pfam_lookup'}   = { %{$self->{'_pfam_lookup'}}, %{$hash_ref} };
    }
    return $self->{'_pfam_lookup'};
}
pfam_ls_versiondescriptionprevnextTop
sub pfam_ls_version {
    my ($self) = @_;
    unless($self->{'_pfam_ls_version'}){
	my $ver = BlastableVersion->new($self->hmmdb);
#	$self->{'_pfam_ls_version'} = $ver->version();
$self->{'_pfam_ls_version'} = $ver->get_version($self->hmmdb); } return $self->{'_pfam_ls_version'};
}
programdescriptionprevnextTop
sub program {
    my ($self, $program) = @_;
    $self->{'_program'} = $program if $program;
    return $self->{'_program'};
}
querydescriptionprevnextTop
sub query {
    my( $self, $value ) = @_;
    if ($value) {
        #need to check if passed sequence is Bio::Seq object
$value->isa("Bio::PrimarySeqI") || throw("Input isn't a Bio::PrimarySeqI"); $self->{'_query'} = $value; } return $self->{'_query'};
}
rundescriptionprevnextTop
sub run {
    my ($self, $dir) = @_;

    my $swissprot_ids = $self->get_swissprot_ids();
    my $pfam_ids = $self->get_pfam_hmm($swissprot_ids,$dir);

    #$self->create_genewisehmm($pfam_ids, $dir);
#$self->create_genewisehmm_individually($pfam_ids, $dir);
$self->create_genewisehmm_complete($pfam_ids);
}
run_genewisehmmdescriptionprevnextTop
sub run_genewisehmm {
    my ($self, $strand) = @_;
    my $memory = $self->memory;
    print STDERR "running on strand: $strand\n"; ##########
my $genewisehmm = $self->get_GenewiseHMM($strand, $memory); $genewisehmm->run(); my @features = $genewisehmm->output(); $self->display_genes(\@features); ##########
print STDERR "adding ".scalar(@features)." to output\n"; ##########
$self->add_output_features(\@features);
}
General documentation
CONTACTTop
rds@sanger.ac.uk
refactored by Sindhu K. Pillai sp1@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _