Bio::EnsEMBL::Analysis::Runnable PSILC
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::Runnable::PSILC;
Package variables
No package variables defined.
Included modules
Bio::Align::Utilities qw ( aa_to_dna_aln )
Bio::AlignIO
Bio::EnsEMBL::Analysis::Config::Pseudogene
Bio::EnsEMBL::Analysis::Runnable
Bio::SimpleAlign
Bio::Tools::Run::Alignment::Clustalw
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
  my $PSILC = Bio::EnsEMBL::Analysis::Runnable::PSILC->new 
(
'-trans' => $transcript,
'-homologs' => $homologs,
'-analysis' => $analysis,
'-domain' => $domains,
'-input_id' => $self->input_id,
);
$runnabledb->fetch_input();
$runnabledb->run();
my @array = @{$runnabledb->output};
$runnabledb->write_output();
Description
Runnable for PSILC
Methods
domainsDescriptionCode
files_to_be_deletedDescriptionCode
homologsDescriptionCode
id
No description
Code
make_codon_based_alignmentDescriptionCode
newDescriptionCode
outputDescriptionCode
output_dirDescriptionCode
parse_resultsDescriptionCode
runDescriptionCode
run_analysisDescriptionCode
transDescriptionCode
write_pfam_idsDescriptionCode
Methods description
domainscode    nextTop
Arg [1] : array ref
Description: get/set pfam domain identifiers
Returntype : string of pfam identifiers
Exceptions : none
Caller : general
files_to_be_deletedcodeprevnextTop
  Arg [1]    : scalar - filename 
Description: marks the PSILC output files for deletion
Returntype : none
Exceptions : none
Caller : general
homologscodeprevnextTop
Arg [1] : array ref
Description: get/set gene set to run over
Returntype : array ref to Bio::EnsEMBL::Gene objects
Exceptions : throws if not a Bio::EnsEMBL::Gene
Caller : general
make_codon_based_alignmentcodeprevnextTop
  Arg [1]    : none
Description: Makes a dna alignment based on a protein alignment using
Bio::Align::Utilities DNA alignment is for the translateable part of the
transcript only
Returntype : none
Exceptions : none
Caller : general
newcodeprevnextTop
  Arg [1]    : none
Description: Creates runnable
Returntype : none
Exceptions : none
Caller : general
outputcodeprevnextTop
  Arg [1]    : hash_ref
Description: overrides output array
Returntype : hash_ref
Exceptions : none
Caller : general
output_dircodeprevnextTop
  Arg [1]    : scalar - input id of the anlysis
Description: Creates the output directory for the PSILC files
Returntype : none
Exceptions : none
Caller : general
parse_resultscodeprevnextTop
  Arg [1]    : scalar - filename 
Description: parses PSILC results
Returntype : none
Exceptions : warns if the results are unreadable
Caller : general
runcodeprevnextTop
  Arg [1]    : none
Description: Makes alignment out of transcripts and runs PSILC
Returntype : none
Exceptions : none
Caller : general
run_analysiscodeprevnextTop
  Arg [1]    : scalar - program name 
Description: runs the PSILC executable
Returntype : none
Exceptions : throws if program is not executable
Caller : general
transcodeprevnextTop
Arg [1] : array ref
Description: get/set trans set to run over
Returntype : array ref to Bio::EnsEMBL::Transcript objects
Exceptions : throws if not a Bio::EnsEMBL::Transcript
Caller : general
write_pfam_idscodeprevnextTop
  Arg [1]    : none
Description: Writes identifiers for PFAM domains that PSILC will search for
in the protein alignment
Returntype : none
Exceptions : throws if it cannot open the file to write to
Caller : general
Methods code
domainsdescriptionprevnextTop
sub domains {
  my ($self, $domains) = @_;
  if ($domains) {
    $self->{'_domains'} = $$domains;
  }
  return $self->{'_domains'};
}
files_to_be_deleteddescriptionprevnextTop
sub files_to_be_deleted {
  my ($self,$filename) = @_;
  my $dir =  $self->workdir;
  my $domains = $self->domains;
  $domains =~ s/PF.+\t//;
  chomp $domains;
  # Break filename into its component parts
my @fnc = split /\./,$filename; # $self->files_to_delete($filename.".aln");
$self->files_to_delete($filename.".out"); $self->files_to_delete($fnc[0].".nhx"); $self->files_to_delete("$dir/pfamA"); $self->files_to_delete("$dir/PSILC_WAG_HKY/domdom"); $self->files_to_delete("$dir/PSILC_WAG_HKY/posteriorN"); $self->files_to_delete("$dir/PSILC_WAG_HKY/posteriorP"); $self->files_to_delete("$dir/PSILC_WAG_HKY/psilcN"); $self->files_to_delete("$dir/PSILC_WAG_HKY/psilcP"); # $self->files_to_delete("$dir/PSILC_WAG_HKY/summary");
$self->files_to_delete("$dir/PSILC_WAG_HKY/$domains"); return 1;
}
homologsdescriptionprevnextTop
sub homologs {
  my ($self, $homologs) = @_;
  if ($homologs) {
  foreach my $hom (@{$homologs}){
    unless  ($hom->isa("Bio::EnsEMBL::Transcript")){
      $self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $hom\n$@");
      }
    }
    $self->{'_homologs'} = $homologs;
  }
  return $self->{'_homologs'};
}
iddescriptionprevnextTop
sub id {
  my ($self) = @_;
  if ($self->trans->stable_id){
    return $self->trans->stable_id;
  }
  return $self->trans->dbID;
}
make_codon_based_alignmentdescriptionprevnextTop
sub make_codon_based_alignment {
  my ($self) = @_;
  my @aaseqs;
  my $aaseq;
  my %dnaseqs;
  # make alignment file for output
my $filename = $self->queryfile.".aln"; my $alignIO = Bio::AlignIO->new ( -file => ">$filename", -format => "clustalw", ); my $pep_alignIO = Bio::AlignIO->new ( -file => ">$filename.pep", -format => "clustalw", ); # run clustal
my $clustalw = Bio::Tools::Run::Alignment::Clustalw->new(); # prepare sequences, aaseqs has the proteins used for the alignment
# dnaseqs is a hash of the tranlateable nucleotide sequence
# Both sets need to use transcript ids which as also used as the hash keys
$aaseq = $self->trans->translate; # change id of translation to match transcript
$aaseq->display_id($self->id); push @aaseqs,$aaseq; $dnaseqs{$self->id} = Bio::Seq->new ( -display_id => $self->id, -seq => $self->trans->translateable_seq, ); foreach my $homolog(@{$self->homologs}){ next if ($homolog->stable_id eq $self->id); $aaseq = $homolog->translate; # change id of translation to match transcript
$aaseq->display_id($homolog->stable_id); push @aaseqs,$aaseq; $dnaseqs{$homolog->stable_id} = Bio::Seq->new ( -display_id => $homolog->stable_id, -seq => $homolog->translateable_seq, ); } foreach my $seq (@aaseqs){ print $seq->display_id."\n"; } my $alignment = $clustalw->align(\@aaseqs); my $dna_aln = aa_to_dna_aln($alignment,\%dnaseqs); foreach my $seq ($dna_aln->each_seq){ print $seq->display_id."\n"; } $pep_alignIO->write_aln($alignment); $alignIO->write_aln($dna_aln); return 1; } #######################################
# Containers
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);

  $self->{'_trans'} = {};	# transcript to test;
$self->{'_homologs'} = (); # array of holmologs to cluster with transcript;
$self->{'_domains'} = (); # array ref of protein feature ids;
my($trans,$domains,$homologs,$input_id) = $self->_rearrange([qw( TRANS DOMAIN HOMOLOGS INPUT_ID )], @args); if ($trans) { $self->trans($trans); } if ($homologs) { $self->homologs($homologs); } if ($domains) { $self->domains($domains); } $self->output_dir($input_id); $self->program('/nfs/acari/sw4/Pseudogenes/PSILC/psilc1.21/psilc.jar'); return $self;
}
outputdescriptionprevnextTop
sub output {
  my ($self, $hash_ref) = @_;
  if ($hash_ref) {
    $self->{'_output'} = $hash_ref;
  }
  return $self->{'_output'};
}

1;
}
output_dirdescriptionprevnextTop
sub output_dir {
  my ($self,$input_id) = @_;
  my $output_dir;
  $output_dir = $self->id;
  if ($PSILC_WORK_DIR){
    system("mkdir $PSILC_WORK_DIR/$$input_id/$output_dir");
    $self->workdir("$PSILC_WORK_DIR/$$input_id/$output_dir");
  }
  else{
    $self->throw("Cannot make output directory\n");
  }
  return 1;
}
parse_resultsdescriptionprevnextTop
sub parse_results {
  my ($self, $filename) =@_;
  my ($nuc_dom,$prot_dom,$postPMax,$postNmax) = 0;
  my $dir =  $self->workdir;
  my $trans = $self->trans;
  my $id    = $self->id;
  open (PSILC,"$dir/PSILC_WAG_HKY/summary") or $self->warn("Cannot open results file $dir/PSILC_WAG_HKY/summary\n$@\n");
  while(<PSILC>){
    chomp;
    next unless $_ =~ /^$id\/\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
    $nuc_dom =  $1;
    $prot_dom = $2;
    $postPMax = $3;
    $postNmax = $4;
    last;
  }
  close PSILC;
  unless  ($nuc_dom){
    $self->warn("ERROR unable to parse results file $dir/PSILC_WAG_HKY/summary\n");
    $self->warn("Failed for $id"); 
    return 0;
  }
  my %results = (
		 'id'       => $id,
		 'prot_dom' => $prot_dom,
		 'nuc_dom'  => $nuc_dom,
		 'postPMax' => $postPMax,
		 'postNMax' => $postNmax,
		);
  $self->output(\%results);
  return 1;
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
  $self->checkdir();
  $self->write_pfam_ids;
  my $filename = $self->create_filename('PSILC','fasta');
  $self->make_codon_based_alignment;
  $self->run_analysis();
  $self->files_to_be_deleted($filename);
  $self->parse_results;
#  $self->delete_files;
}
run_analysisdescriptionprevnextTop
sub run_analysis {
  my ($self, $program) = @_;
  if(!$program){
    $program = $self->program;
  }
  throw($program." is not executable Runnable::run_analysis ") 
    unless($program && -x $program);
  my $file = $self->queryfile;
  my $dir =  $self->workdir;
  $file =~ s/^$dir\///;
  my $command = "java -Xmx400m -jar $program";
  $command .= " --align $file.aln";
  $command .= " --seq ".$self->id;
  $command .= " --restrict 1";
 # $command .= " --alignMethod 1";
$command .= " --max_nodes 10:6"; $command .= " --repository /ecs2/work2/sw4/PFAM"; $command .= " > ".$self->resultsfile; print "Running analysis ".$command."\n"; system($command) == 0 or $self->throw("FAILED to run ".$command);
}
transdescriptionprevnextTop
sub trans {
  my ($self, $trans) = @_;
  if ($trans) {
    unless  ($trans->isa("Bio::EnsEMBL::Transcript")){
      $self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $trans\n$@");
    }
    $self->{'_trans'} = $trans;
  }
  return $self->{'_trans'};
}
write_pfam_idsdescriptionprevnextTop
sub write_pfam_ids {
  my ($self)=@_;
  my $dir =  $self->workdir;
  my $domains = $self->domains;
  print STDERR "writing pfam ids to $dir/pfamA\n";
  open (IDS,">$dir/pfamA") or $self->throw("Cannot open file for writing pfam ids $dir/pfamA\n");
  print IDS "$domains";
  close IDS;
  return 1;
}
General documentation
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk