PSILC documentation.

Arg [1] : array ref
Description: get/set pfam domain identifiers
Returntype : string of pfam identifiers
Exceptions : none
Caller : general

files_to_be_deleted

  Arg [1]    : scalar - filename 
  Description: marks the PSILC output files for deletion
  Returntype : none
  Exceptions : none
  Caller     : general

homologs

Arg [1] : array ref
Description: get/set gene set to run over
Returntype : array ref to Bio::EnsEMBL::Gene objects
Exceptions : throws if not a Bio::EnsEMBL::Gene
Caller : general

make_codon_based_alignment

  Arg [1]    : none
  Description: Makes a dna alignment based on a protein alignment using 
Bio::Align::Utilities DNA alignment is for the translateable part of the 
transcript only
  Returntype : none
  Exceptions : none
  Caller     : general

new

  Arg [1]    : none
  Description: Creates runnable
  Returntype : none
  Exceptions : none
  Caller     : general

output

  Arg [1]    : hash_ref
  Description: overrides output array
  Returntype : hash_ref
  Exceptions : none
  Caller     : general

output_dir

  Arg [1]    : scalar - input id of the anlysis
  Description: Creates the output directory for the PSILC files
  Returntype : none
  Exceptions : none
  Caller     : general

parse_results

  Arg [1]    : scalar - filename 
  Description: parses PSILC results
  Returntype : none
  Exceptions : warns if the results are unreadable
  Caller     : general

run

  Arg [1]    : none
  Description: Makes alignment out of transcripts and runs PSILC
  Returntype : none
  Exceptions : none
  Caller     : general

run_analysis

  Arg [1]    : scalar - program name 
  Description: runs the PSILC executable
  Returntype : none
  Exceptions : throws if program is not executable
  Caller     : general

trans

Arg [1] : array ref
Description: get/set trans set to run over
Returntype : array ref to Bio::EnsEMBL::Transcript objects
Exceptions : throws if not a Bio::EnsEMBL::Transcript
Caller : general

write_pfam_ids

  Arg [1]    : none
  Description: Writes identifiers for PFAM domains that PSILC will search for
in the protein alignment
  Returntype : none
  Exceptions : throws if it cannot open the file to write to
  Caller     : general

Methods code

domains

sub domains {

  my ($self, $domains) = @_;
  if ($domains) {
    $self->{'_domains'} = $$domains;
  }
  return $self->{'_domains'};

}

files_to_be_deleted

sub files_to_be_deleted {

  my ($self,$filename) = @_;
  my $dir =  $self->workdir;
  my $domains = $self->domains;
  $domains =~ s/PF.+\t//;
  chomp $domains;
  # Break filename into its component parts
  my @fnc = split /\./,$filename;
#  $self->files_to_delete($filename.".aln");
  $self->files_to_delete($filename.".out");
  $self->files_to_delete($fnc[0].".nhx");
  $self->files_to_delete("$dir/pfamA");
  $self->files_to_delete("$dir/PSILC_WAG_HKY/domdom");
  $self->files_to_delete("$dir/PSILC_WAG_HKY/posteriorN");
  $self->files_to_delete("$dir/PSILC_WAG_HKY/posteriorP");
  $self->files_to_delete("$dir/PSILC_WAG_HKY/psilcN");
  $self->files_to_delete("$dir/PSILC_WAG_HKY/psilcP");
#  $self->files_to_delete("$dir/PSILC_WAG_HKY/summary");  
  $self->files_to_delete("$dir/PSILC_WAG_HKY/$domains");
  return 1;

}

homologs

sub homologs {

  my ($self, $homologs) = @_;
  if ($homologs) {
  foreach my $hom (@{$homologs}){
    unless  ($hom->isa("Bio::EnsEMBL::Transcript")){
      $self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $hom\n$@");
      }
    }
    $self->{'_homologs'} = $homologs;
  }
  return $self->{'_homologs'};

}

description

sub id {

  my ($self) = @_;
  if ($self->trans->stable_id){
    return $self->trans->stable_id;
  }
  return $self->trans->dbID;

}

make_codon_based_alignment

sub make_codon_based_alignment {

  my ($self) = @_;
  my @aaseqs;
  my $aaseq;
  my %dnaseqs;
  # make alignment file for output
  my $filename = $self->queryfile.".aln";
  my $alignIO = Bio::AlignIO->new
    (
     -file => ">$filename",
     -format => "clustalw",
    );
  my $pep_alignIO = Bio::AlignIO->new
    (
     -file => ">$filename.pep",
     -format => "clustalw",
    );
  # run clustal
  my $clustalw = Bio::Tools::Run::Alignment::Clustalw->new(); 
  # prepare sequences, aaseqs has the proteins used for the alignment
  # dnaseqs is a hash of the tranlateable nucleotide sequence
  # Both sets need to use transcript ids which as also used as the hash keys
  $aaseq = $self->trans->translate;
  # change id of translation to match transcript
  $aaseq->display_id($self->id);
  push @aaseqs,$aaseq;
  $dnaseqs{$self->id} = Bio::Seq->new
    (
     -display_id => $self->id,
     -seq        => $self->trans->translateable_seq,
    );
  foreach my $homolog(@{$self->homologs}){
    next if ($homolog->stable_id eq $self->id);
    $aaseq = $homolog->translate;  
    # change id of translation to match transcript
    $aaseq->display_id($homolog->stable_id);
    push @aaseqs,$aaseq;
    $dnaseqs{$homolog->stable_id} = Bio::Seq->new
      (
       -display_id => $homolog->stable_id,
       -seq        => $homolog->translateable_seq,
      );
  }
  foreach my $seq (@aaseqs){
    print $seq->display_id."\n";
  }

  my $alignment = $clustalw->align(\@aaseqs);
  my $dna_aln = aa_to_dna_aln($alignment,\%dnaseqs);
  foreach my $seq ($dna_aln->each_seq){
    print $seq->display_id."\n";
  }  
  $pep_alignIO->write_aln($alignment);
  $alignIO->write_aln($dna_aln);
  return 1;
}

#######################################
# Containers

}

new

sub new {

  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);

  $self->{'_trans'} = {};	# transcript to test;
  $self->{'_homologs'} = ();	# array of holmologs to cluster with transcript;
  $self->{'_domains'} = ();	# array ref of protein feature ids;

  my($trans,$domains,$homologs,$input_id) = $self->_rearrange([qw(
							TRANS
							DOMAIN
							HOMOLOGS
							INPUT_ID
						       )], @args);
  if ($trans) {
    $self->trans($trans);
  }
  if ($homologs) {
    $self->homologs($homologs);
  }
  if ($domains) {
    $self->domains($domains);
  }
  $self->output_dir($input_id);
  $self->program('/nfs/acari/sw4/Pseudogenes/PSILC/psilc1.21/psilc.jar');
  return $self;

}

output

sub output {

  my ($self, $hash_ref) = @_;
  if ($hash_ref) {
    $self->{'_output'} = $hash_ref;
  }
  return $self->{'_output'};
}

1;

}

output_dir

sub output_dir {

  my ($self,$input_id) = @_;
  my $output_dir;
  $output_dir = $self->id;
  if ($PSILC_WORK_DIR){
    system("mkdir $PSILC_WORK_DIR/$$input_id/$output_dir");
    $self->workdir("$PSILC_WORK_DIR/$$input_id/$output_dir");
  }
  else{
    $self->throw("Cannot make output directory\n");
  }
  return 1;

}

parse_results

sub parse_results {

  my ($self, $filename) =@_;
  my ($nuc_dom,$prot_dom,$postPMax,$postNmax) = 0;
  my $dir =  $self->workdir;
  my $trans = $self->trans;
  my $id    = $self->id;
  open (PSILC,"$dir/PSILC_WAG_HKY/summary") or $self->warn("Cannot open results file $dir/PSILC_WAG_HKY/summary\n$@\n");
  while(<PSILC>){
    chomp;
    next unless $_ =~ /^$id\/\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
    $nuc_dom =  $1;
    $prot_dom = $2;
    $postPMax = $3;
    $postNmax = $4;
    last;
  }
  close PSILC;
  unless  ($nuc_dom){
    $self->warn("ERROR unable to parse results file $dir/PSILC_WAG_HKY/summary\n");
    $self->warn("Failed for $id"); 
    return 0;
  }
  my %results = (
		 'id'       => $id,
		 'prot_dom' => $prot_dom,
		 'nuc_dom'  => $nuc_dom,
		 'postPMax' => $postPMax,
		 'postNMax' => $postNmax,
		);
  $self->output(\%results);
  return 1;

}

run

sub run {

  my ($self) = @_;
  $self->checkdir();
  $self->write_pfam_ids;
  my $filename = $self->create_filename('PSILC','fasta');
  $self->make_codon_based_alignment;
  $self->run_analysis();
  $self->files_to_be_deleted($filename);
  $self->parse_results;
#  $self->delete_files;

}

run_analysis

sub run_analysis {

  my ($self, $program) = @_;
  if(!$program){
    $program = $self->program;
  }
  throw($program." is not executable Runnable::run_analysis ") 
    unless($program && -x $program);
  my $file = $self->queryfile;
  my $dir =  $self->workdir;
  $file =~ s/^$dir\///;
  my $command = "java -Xmx400m -jar $program";
  $command .= " --align $file.aln";
  $command .= " --seq ".$self->id;
  $command .= " --restrict 1";
 # $command .= " --alignMethod 1";
  $command .= " --max_nodes 10:6";
  $command .= " --repository /ecs2/work2/sw4/PFAM";
  $command .= " > ".$self->resultsfile;
  print "Running analysis ".$command."\n";
  system($command) == 0 or  $self->throw("FAILED to run ".$command);

}

trans

sub trans {

  my ($self, $trans) = @_;
  if ($trans) {
    unless  ($trans->isa("Bio::EnsEMBL::Transcript")){
      $self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $trans\n$@");
    }
    $self->{'_trans'} = $trans;
  }
  return $self->{'_trans'};

}

write_pfam_ids

sub write_pfam_ids {

  my ($self)=@_;
  my $dir =  $self->workdir;
  my $domains = $self->domains;
  print STDERR "writing pfam ids to $dir/pfamA\n";
  open (IDS,">$dir/pfamA") or $self->throw("Cannot open file for writing pfam ids $dir/pfamA\n");
  print IDS "$domains";
  close IDS;
  return 1;

}

General documentation

CONTACT