Bio::EnsEMBL::Analysis::RunnableDB
ExonerateTags
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateTags
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::ExonerateTags
Inherit
Synopsis
my $analysis_obj = $db->get_AnalysisAdaptor->fetch_by_logic_name("Exonerate_Tags");
my $runnabledb =
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateTags->new( -db => $db,
-analysis => $analysis_obj};
$runnabledb->fetch_input();
$runnabledb->run();
$runnabledb->write_output();
Description
RunnnableDB module for the Ditag analysis. Fetches Ditag sequences from database,
tries to align them to the genome with the Runnable and stores DitagFeatures
after filtering the hits.
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Map::Ditag Function : Remove undesired sequences, eg. repetative seqs. Returntype: Bio::EnsEMBL::Map::Ditag or undef Caller : private |
Arg [1] : sequence string of tag Arg [2] : sequence string of bases to remove Function : Remove undesired sequences at the start of a tag, eg. G from GIS tags if this specified for the tag lib. Returntype: sequence string Caller : private |
Arg [1] : sequence string of tag Arg [2] : sequence string of bases to remove Function : Remove undesired sequences from the end of a tag, eg. AA from GIS tags. Returntype: sequence string Caller : private |
Arg [1] : Bio::EnsEMBL::Map::Ditag Function : split a ditag into two parts to save to file FOR NOW THESE ARE JUST 2 HALVES Returntype: string with sequence header (_L & _R) and sequence for the parts (FASTA format) Caller : private |
Arg [1] : array ref of Bio::EnsEMBL::Map::DitagFeatures Function : re-set analysis, attach slice Returntype: array ref of Bio::EnsEMBL::Map::DitagFeatures Caller : general |
Args : none Function : get DBAdaptor for output db Returntype: Bio::EnsEMBL::DBSQL::DBAdaptor Caller : general |
Args : none Function : fetch data from the database and create runnable Returntype: none Caller : general |
Arg : various Function : create a runnableDB for the Ditag analysis Returntype: Bio::EnsEMBL::Analysis::RunnableDB::ExonerateTags Caller : general |
Args : none Function : run the Ditag runnable Returntype: none Caller : general |
Args : none Function : GETTER functions for path to Runnable Returntype: string Caller : general |
Arg [1] : array ref of Bio::EnsEMBL::Map::DitagFeatures Function : get ditag feature adaptor with new features, clean and store them. Returntype: none Caller : general |
Methods code
sub BATCHSIZE
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_BATCHSIZE'} = $value;
}
return $self->{'_BATCHSIZE'}; } |
sub CHOPFIRST
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_CHOPFIRST'} = $value;
}
return $self->{'_CHOPFIRST'}; } |
sub CHOPLAST
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_CHOPLAST'} = $value;
}
return $self->{'_CHOPLAST'}; } |
sub DNADB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_DNADB'} = $value;
}
return $self->{'_DNADB'}; } |
sub GENOMICSEQS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_GENOMICSEQS'} = $value;
}
return $self->{'_GENOMICSEQS'}; } |
sub IIDREGEXP
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_IIDREGEXP'} = $value;
}
return $self->{'_IIDREGEXP'}; } |
sub KEEPORDER
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_KEEPORDER'} = $value;
}
return $self->{'_KEEPORDER'}; } |
sub MAXDISTANCE
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_MAXDISTANCE'} = $value;
}
return $self->{'_MAXDISTANCE'}; } |
sub MAXMISMATCH
{ my $self = shift;
$self->{'_MAXMISMATCH'} = shift if (@_);
return $self->{'_MAXMISMATCH'}; } |
sub MAXSEQLENGTH
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_MAXSEQLENGTH'} = $value;
}
return $self->{'_MAXSEQLENGTH'}; } |
sub MINSEQLENGTH
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_MINSEQLENGTH'} = $value;
}
return $self->{'_MINSEQLENGTH'}; } |
sub OPTIONS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_OPTIONS'} = $value;
}
return $self->{'_OPTIONS'}; } |
sub OUTDB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_OUTDB'} = $value;
}
return $self->{'_OUTDB'}; } |
sub PROGRAM
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_PROGRAM'} = $value;
}
return $self->{'_PROGRAM'}; } |
sub QUERYFILES
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_QUERYFILE'} = $value;
}
return $self->{'_QUERYFILE'}; } |
sub QUERYSEQS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_QUERYSEQS'} = $value;
}
return $self->{'_QUERYSEQS'}; } |
sub REPEATNUMBER
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_REPEATNUMBER'} = $value;
}
return $self->{'_REPEATNUMBER'}; } |
sub SPECOPTIONS
{ my $self = shift;
$self->{'_SPECOPTIONS'} = shift if (@_);
return $self->{'_SPECOPTIONS'}; } |
sub SPLITSEQS
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_SPLITSEQS'} = $value;
}
return $self->{'_SPLITSEQS'}; } |
sub TAGCOUNT
{ my ( $self, $value ) = @_;
if ($value) {
$self->{'_TAGCOUNT'} = $value;
}
return $self->{'_TAGCOUNT'};
}
1; } |
sub TAGTYPE
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_TAGTYPE'} = $value;
}
return $self->{'_TAGTYPE'}; } |
sub TMPDIR
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_TMPDIR'} = $value;
}
return $self->{'_TMPDIR'}; } |
sub _check_seq
{ my ($self, $ditag) = @_;
my $repeat_number = $self->REPEATNUMBER();
if( ($ditag->sequence =~ /A{$repeat_number}/) or ($ditag->sequence =~ /T{$repeat_number}/) or
($ditag->sequence =~ /C{$repeat_number}/) or ($ditag->sequence =~ /G{$repeat_number}/) ){
print "removing repetitive ditag ".($ditag->dbID)." ".($ditag->tag_count)."\n";
$ditag = undef;
}
elsif(defined($self->MINSEQLENGTH) and (length($ditag->sequence) < $self->MINSEQLENGTH)){
print "removing short ditag ".($ditag->dbID)." ".($ditag->tag_count)."\n";
$ditag = undef;
}
elsif(defined($self->MAXSEQLENGTH) and (length($ditag->sequence) > $self->MAXSEQLENGTH)){
print "removing long ditag ".($ditag->dbID)." ".($ditag->tag_count)."\n";
$ditag = undef;
}
elsif($ditag->sequence =~ /N/){
print "removing ditag with 'N' ".($ditag->dbID)." ".($ditag->tag_count)."\n";
$ditag = undef;
}
return $ditag; } |
sub _chop_first
{ my ($self, $ditagseq, $chop_first) = @_;
if($ditagseq =~ /^$chop_first.+$/){
$ditagseq = substr($ditagseq, length($chop_first), length($ditagseq)-length($chop_first));
}
return $ditagseq; } |
sub _chop_last
{ my ($self, $ditagseq, $chop_last) = @_;
if($ditagseq =~ /^$chop_last.+$/){
$ditagseq = substr($ditagseq, 0, length($ditagseq)-length($chop_last));
}
return $ditagseq; } |
sub _split_seq
{ my ($self, $ditag, $chop_first, $chop_last) = @_;
my $addone = 0;
my $motherseq = $ditag->sequence;
my $id = $ditag->dbID;
if($chop_first){
$motherseq = $self->_chop_first($motherseq, $chop_first);
}
if($chop_last){
$motherseq = $self->_chop_last($motherseq, $chop_last);
}
my $cutoffpoint = (length($motherseq))/2; if($cutoffpoint =~ /\./){
$addone = 1;
}
$cutoffpoint = int($cutoffpoint);
my $start_A = 0;
my $end_A = $cutoffpoint;
if($addone){ $end_A++; }
my $seqpart_A = ">".$id."_L_".$start_A."\n";
$seqpart_A .= (substr($motherseq, 0, $end_A))."\n";
my $start_B = $cutoffpoint;
my $seqpart_B = ">".$id."_R_".($start_B)."\n";
$seqpart_B .= (substr($motherseq, $start_B))."\n";
return($seqpart_A.$seqpart_B); } |
sub clean_features
{ my ( $self, $ditagfeatures ) = @_;
my %genome_slices;
my @cleanfeatures;
my $sa = $self->db->get_SliceAdaptor;
foreach my $ditagfeature (@$ditagfeatures) {
$ditagfeature->analysis($self->analysis);
my $slice_id = $ditagfeature->slice;
if ( not exists $genome_slices{$slice_id} ) {
print "\nTRYING TO FETCH $slice_id.";
$genome_slices{$slice_id} = $sa->fetch_by_name($slice_id);
}
my $slice = $genome_slices{$slice_id};
$ditagfeature->slice($slice);
push(@cleanfeatures, $ditagfeature);
}
return\@ cleanfeatures; } |
sub create_output_db
{ my ($self) = @_;
my $outdb;
my $dnadb;
print "\nCreating OUTDB.";
if ( $self->OUTDB && $self->DNADB) {
$dnadb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{ $self->OUTDB });
$outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor(
%{ $self->OUTDB },
-dnadb => $dnadb
);
} elsif( $self->OUTDB) {
$outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{ $self->OUTDB });
} else {
$outdb = $self->db;
}
return $outdb; } |
sub fetch_input
{ my ($self) = @_;
my $target = $self->GENOMICSEQS;
if ( -e $target ){
if(!(-d $target)and !(-s $target)) {
throw("'$target' isn't a file or a directory?");
}
} else {
throw("'$target' could not be found");
}
my @fasta_entries = ();
my $tmp_dir = $self->TMPDIR;
$self->input_id =~ /ditag\.(\S+)\.([0-9]+)/;
my $tagtype = $1;
my $input_index = $2;
throw("Couldn t get index number from input-id ".$self->input_id)
unless($input_index);
throw("Couldn t get tag-type from input-id ".$self->input_id)
unless($tagtype);
my $batch_size = $self->BATCHSIZE();
my $ditags = $self->db->get_ditagAdaptor->fetch_all_with_limit(
$tagtype, $batch_size, ($batch_size * ($input_index - 1)) );
my $chop_first = $self->CHOPFIRST();
my $chop_last = $self->CHOPLAST();
foreach my $ditag (@$ditags) {
$ditag = $self->_check_seq($ditag);
if($ditag) {
my $ditag_fasta_entry;
if($self->SPLITSEQS()) {
$ditag_fasta_entry = $self->_split_seq($ditag, $chop_first, $chop_last);
} else {
my $tagseq = $ditag->sequence;
if($chop_first){
$tagseq = $self->_chop_first($tagseq, $chop_first);
}
if($chop_last){
$tagseq = $self->_chop_last($tagseq, $chop_last);
}
$ditag_fasta_entry = ">".$ditag->dbID."_F"."\n".$tagseq."\n";
}
push(@fasta_entries, $ditag_fasta_entry);
}
}
my $tmp_file = $tmp_dir."/".$input_index.".tmp";
open(OUTFILE, ">$tmp_file") or throw "couldnt create fasta chunk file $tmp_file";
foreach my $ditag_fasta_entry (@fasta_entries) {
print OUTFILE $ditag_fasta_entry;
}
close(OUTFILE) or throw "couldnt close fasta chunk file $tmp_file";
my %parameters = %{ $self->parameters_hash };
if (
not exists( $parameters{-options} )
and defined $self->OPTIONS
){
$parameters{-options} = $self->OPTIONS;
}
if($self->analysis->db_file){
$parameters{'-STS_FILE'} = $self->analysis->db_file
unless($parameters{'-STS_FILE'});
}
my $program = ($self->PROGRAM()) ? $self->PROGRAM() : $self->analysis->program_file;
if(!$program){ throw "Cant decide what program to use for analysis!\n" }
my $runnable = Bio::EnsEMBL::Analysis::Runnable::ExonerateTags->new
(
-program => $program,
-analysis => $self->analysis,
-target_file => $target,
-query_file => $tmp_file,
-deletequeryfile => 0,
-maxmismatch => $self->MAXMISMATCH(),
-specoptions => $self->SPECOPTIONS(),
-splitseqs => $self->SPLITSEQS(),
-maxdistance => $self->MAXDISTANCE(),
-keeporder => $self->KEEPORDER(),
%parameters
);
$self->runnable($runnable); } |
sub new
{ my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($DITAG_CONFIG);
return $self; } |
sub run
{ my ($self) = @_;
my @out_features;
throw("Can't run - no runnable objects") unless ( $self->runnable );
my $runnable = @{ $self->runnable }[0];
$runnable->run;
@out_features = @{$runnable->output};
$self->output(\@out_features); } |
sub runnable_path
{ my ($self);
return "Bio::EnsEMBL::Analysis::Runnable::DitagAlign"; } |
sub write_output
{ my ( $self, $ditagfeatures ) = @_;
my $ditagfeature_adaptor;
if($self->OUTDB) {
my $outdb = $self->create_output_db;
$ditagfeature_adaptor = $outdb->get_DitagFeatureAdaptor;
} else {
$ditagfeature_adaptor = $self->db->get_DitagFeatureAdaptor;
}
if(!$ditagfeatures or !(scalar @$ditagfeatures)){
$ditagfeatures = $self->output();
}
print "\nhave ".(scalar @$ditagfeatures)." features to store.\n";
if(!$ditagfeatures or !(scalar @$ditagfeatures)){
warn "no features to store.";
}
else {
$ditagfeatures = $self->clean_features($ditagfeatures);
eval { $ditagfeature_adaptor->batch_store($ditagfeatures) };
if ($@) {
$self->throw("Unable to store ditagFeatures!\n $@");
}
} } |
General documentation
Arg [1] : (optional) new value
Function : GETTER/SETTER functions for Config values
Returntype: string or undef
Caller : private