Importer documentation.

Bio::EnsEMBL::Funcgen::Importer

No package variables defined.

my $imp = Bio::EnsEMBL::Funcgen::Importer->new(%params);
$imp->register_experiment();

This program is the main class coordinating import of Arrays and experimental data.
It utilises several underlying definitions classes specific to array vendor, array class and
experimental group.

sub R_norm {

  my ($self, @logic_names) = @_;
  #This currently normalises a single two colour chip at a time
  #rather than normalising across a set of chip
  #also does in sets of analyses
  #good for keeping data separate, but not efficient in terms of querying
  #convert to use one script which only queries for each echip once, then does each anal


  my $aa = $self->db->get_AnalysisAdaptor();
  my $rset_adaptor = $self->db->get_ResultSetAdaptor();
  my $ra_id = $aa->fetch_by_logic_name("RawValue")->dbID();
  my %r_config = (
				  "VSN_GLOG"      => {( libs         => ['vsn'],
										#norm_method => 'vsn',
									  )},
				  
				  "T.Biweight" => {(
									libs => ['affy'],
									#norm_method => 'tukey.biweight',
								   )},
				 );
  

  foreach my $logic_name (@logic_names) {
    #throw("Not yet implemented TukeyBiweight") if $logic_name eq "Tukey_Biweight";

	#this has already been chcecked and set as the norm_analysis
	#need to resolve this multi method approach
    my $norm_anal = $aa->fetch_by_logic_name($logic_name);


	
	#This only creates an RSet for the IMPORT set
	#So if we re-run with a different analysis
	#tab2mage will have already been validated
	#So RSet generation will be skipped
	#We need to recreate the each non-import RSet for this norm analysis
	
	#This also means the RSets are being created before the data has been imported
	#This avoids having to parse tab2mage each time but means we have an uncertain status of these Rsets

    my $rset = $self->get_import_ResultSet($norm_anal, 'experimental_chip');
	
	my @chips = ();
  
    if (! $rset) {
      $self->log("All ExperimentalChips already have status:\t${logic_name}");
    } else {					#Got data to normalise and import
      my @dbids;
      my $R_file     = $self->get_dir("norm")."/${logic_name}.R";
      my $job_name   = $self->experiment->name()."_${logic_name}";
      my $resultfile = $self->get_dir("norm")."/result.${logic_name}.txt";
      my $outfile    = $self->get_dir("norm")."/${logic_name}.out";

	  #How do we get farm job output i.e. run time memusage
	  #from interactive job?
	  #This assumes R_PATH 
	  my $errfile    = $self->get_dir("norm")."/${logic_name}.err";

	  #Let's build this better so we capture the farm output aswell as the job output.
      my $cmdline = "$ENV{'R_PATH'} --no-save < $R_file";# >$errfile 2>&1";
	  #-K option waits for job to complete
      my $bsub = "bsub -K -J $job_name ".$ENV{'R_BSUB_OPTIONS'}.
		" -e $errfile -o $outfile $ENV{'R_FARM_PATH'} CMD BATCH $R_file";
	  
	  #Can we separate the out and err for commandline?
      my $r_cmd = (! $self->farm()) ? "$cmdline >$outfile 2>&1" : $bsub;

      $self->backup_file($resultfile);	#Need to do this as we're appending in the loop
  
      #setup qurey
      #warn "Need to add host and port here";
      #Set up DB, defaults and libs for each logic name
      my $query = "options(scipen=20);library(RMySQL);library(Ringo);"; 
	  #scipen is to prevent probe_ids being converted to exponents
      #Ringo is for default QC

      #foreach my $ln(@logic_names){
	
      foreach my $lib (@{$r_config{$logic_name}{'libs'}}) {
		$query .= "library($lib);";
      }
      #}
      
      $query .= "con<-dbConnect(dbDriver(\"MySQL\"), host=\"".$self->db->dbc->host()."\", port=\"".$self->db->dbc->port()."\", dbname=\"".$self->db->dbc->dbname()."\", user=\"".$self->db->dbc->username()."\"";

	  #should use read only pass here as we are printing this to file
      $query .= ", pass=\"".$self->db->dbc->password."\")\n";
      
      
      #Build queries for each chip
      foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
    

		#should implement logic name here?
		#can't as we need seperate ResultSet for each

  
		if ($echip->has_status($logic_name)) {
		  $self->log("ExperimentalChip ".$echip->unique_id()." already has status:\t$logic_name");
		} else {
	  
		  #warn "Need to roll back here if recovery, as norm import may fail halfway through";
		  
		  push @chips, $echip;
		  my $cc_id = $rset->get_chip_channel_id($echip->dbID());

		  #if ($self->recovery()){
		#	$self->log('Rolling back results for ExperimentalChip('.$echip->dbID().") $logic_name");
		#	$self->db->rollback_results($cc_id) if $self->recovery();							  
		#  }
		  
		  $self->log("Building $logic_name R cmd for ".$echip->unique_id());
		  @dbids = ();
	  
		  foreach my $chan (@{$echip->get_Channels()}) {

			if ($chan->type() eq "EXPERIMENTAL") {
			  push @dbids, $chan->dbID();
			} else {
			  unshift @dbids, $chan->dbID();
			}
		  }
	  
		  throw("vsn does not accomodate more than 2 channels") if (scalar(@dbids > 2) && $logic_name eq "VSN_GLOG");
	  
		  #should do some of this with maps?
		  #HARDCODED metric ID for raw data as one
	
		  #Need to get total and experimental here and set db_id accordingly
		  #can probably do this directly into one df
	  
		  $query .= "c1<-dbGetQuery(con, 'select r.probe_id as PROBE_ID, r.score as CONTROL_score, r.X, r.Y from result r, chip_channel c, result_set rs where c.table_name=\"channel\" and c.table_id=${dbids[0]} and c.result_set_id=rs.result_set_id and rs.analysis_id=${ra_id} and c.chip_channel_id=r.chip_channel_id')\n";

		  $query .= "c2<-dbGetQuery(con, 'select r.probe_id as PROBE_ID, r.score as EXPERIMENTAL_score, r.X, r.Y from result r, chip_channel c, result_set rs where c.table_name=\"channel\" and c.table_id=${dbids[1]} and c.result_set_id=rs.result_set_id and rs.analysis_id=${ra_id} and c.chip_channel_id=r.chip_channel_id')\n";
	  
		  #Can we define some of the basic structures here and reuse in the QC and each norm method?


		  #Is this going to eat up memory?
		  #can we strip out and separate the data from c1 and c2 into RGList and
		  #individual vector for probe_ids, then rm c1 and c2 to free up memory

		  #create RGList object
		  $query .= "R<-as.matrix(c1['CONTROL_score'])\nG<-as.matrix(c2['EXPERIMENTAL_score'])\n";
		  $query .= "genes<-cbind(c1['PROBE_ID'], c1['X'], c1['Y'])\n";
		  $query .= "testRG<-new('RGList', list(R=R, G=G, genes=genes))\n";
		 

		  #QC plots here before doing norm

		  #open pdf device
		  $query .= "pdf('".$self->get_dir('norm').'/'.$echip->unique_id."_QC.pdf', paper='a4', height = 15, width = 9)\n";
		  #set format
		  $query .= "par(mfrow = c(2,2), font.lab = 2)\n";

		  #Channel densisties
		  #These need limma or Ringo
		  $query .= "plotDensities(testRG)\n";
		  
		  #MvA Plot

		  $query .= 'meanLogA<-((log(testRG$R, base=exp(2)) + log(testRG$G, base=exp(2)))/2)'."\n";
		  $query .= 'logIntRatioM<-(log(testRG$R, base=exp(2)) - log(testRG$G, base=exp(2)))'."\n";
		  $query .= "yMin<-min(logIntRatioM)\n";
		  $query .= "yMax<-max(logIntRatioM)\n";



		  #Need to validate yMax here
		  #If is is Inf then we need to sort the vector and track back until we find the high real number
		  #count number of Infs and note on MvA plot
		  $query .= "infCount<-0\n";
		  $query .= "if( yMax == Inf){; sortedM<-sort(logIntRatioM); lengthM<-length(logIntRatioM); indexM<-lengthM\n"
			."while (yMax == Inf){; indexM<-(indexM-1); yMax<-sortedM[indexM];}; infCount<-(lengthM-indexM);}\n";

		  #
		  $query .= "if(infCount == 0){\n"; 
		  $query .= 'plot(meanLogA, logIntRatioM, xlab="A - Average Log Ratio",ylab="M - Log Ratio",pch=".",ylim=c(yMin,yMax), main="'.$echip->unique_id.'")'."\n";
		  $query .= "} else {\n";
		  $query .= 'plot(meanLogA, logIntRatioM, xlab="A - Average Log Ratio",ylab="M - Log Ratio",pch=".",ylim=c(yMin,yMax), main="'.$echip->unique_id.'", sub=paste(infCount, " Inf values not plotted"));'."}\n";


		  #$query .= 'plot(log(testRG$R*testRG$G, base=exp(2))/2, log(testRG$R/testRG$G, base=exp(2)),xlab="A",ylab="M",pch=".",ylim=c(-3,3), main="'.$echip->unique_id."\")\n";

		  #Plate plots
		  $query .= 'image(testRG, 1, channel = "green", mycols = c("black", "green4", "springgreen"))'."\n";
		  $query .= 'image(testRG, 1, channel = "red", mycols = c("black", "green4", "springgreen"))'."\n";

		  $query .= "dev.off()\n";
		  #Finished QC pdf printing



		  #The simple preprocess step of Ringo is actually vsn, so we can nest these in line


		  ### Build Analyses cmds ###
		  
		  if($logic_name eq 'T.Biweight'){

			#log2 ratios
			$query .= 'lr_df<-cbind((log(c2["EXPERIMENTAL_score"], base=exp(2)) - log(c1["CONTROL_score"], base=exp(2))))'."\n";
			
			#Adjust using tukey.biweight weighted average
			#inherits first col name
			$query .= 'norm_df<-(lr_df["EXPERIMENTAL_score"]-tukey.biweight(as.matrix(lr_df)))'."\n";
			$query .= 'formatted_df<-cbind(rep.int(0, length(c1["PROBE_ID"])), c1["PROBE_ID"], sprintf("%.3f", norm_df[,1]), rep.int('.$cc_id.', length(c1["PROBE_ID"])), c1["X"], c1["Y"])'."\n";
		  
		}
		elsif($logic_name eq 'VSN_GLOG'){
		  #could do this directly
		  $query .= "raw_df<-cbind(c1[\"CONTROL_score\"], c2[\"EXPERIMENTAL_score\"])\n";
		  #variance stabilise
		  $query .= "norm_df<-vsn(raw_df)\n";
    
	  
		  #do some more calcs here and print report?
		  #fold change exponentiate? See VSN docs
		  #should do someplot's of raw and glog and save here?
		  #set log func and params
		  #$query .= "par(mfrow = c(1, 2)); log.na = function(x) log(ifelse(x > 0, x, NA));";
		  #plot
		  #$query .= "plot(exprs(glog_df), main = \"vsn\", pch = \".\");". 
		  #  "plot(log.na(exprs(raw_df)), main = \"raw\", pch = \".\");"; 
		  #FAILS ON RAW PLOT!!
		  #par(mfrow = c(1, 2)) 
		  #> meanSdPlot(nkid, ranks = TRUE) 
		  #> meanSdPlot(nkid, ranks = FALSE) 
		
	
		  #Now create table structure with glog values(diffs)
		  #3 sig dec places on scores(doesn't work?!)
		  $query .= 'formatted_df<-cbind(rep.int(0, length(c1["PROBE_ID"])), c1["PROBE_ID"], sprintf("%.3f", (exprs(norm_df[,2]) - exprs(norm_df[,1]))), rep.int('.$cc_id.', length(c1["PROBE_ID"])), c1["X"], c1["Y"])'."\n";
		  
		}
		#load back into DB
		#c3results<-cbind(rep("", length(c3["probe_id"])), c3["probe_id"], c3["c3_score"], rep(1, length(c3["probe_id"])), rep(1, length(c3["probe_id"])))
		#may want to use safe.write here
		#dbWriteTable(con, "result", c3results, append=TRUE)
		#dbWriteTable returns true but does not load any data into table!!!
		
		$query .= "write.table(formatted_df, file=\"${resultfile}\", sep=\"\\t\", col.names=FALSE, row.names=FALSE, quote=FALSE, append=TRUE)\n";
		
		#tidy up here?? 
		}
      }
  
      $query .= "q();";
  
      open(RFILE, ">$R_file") || throw("Cannot open $R_file for writing");
      print RFILE $query;
      close(RFILE);
 
	  my $submit_text = "Submitting $logic_name job";
	  $submit_text .= ' to farm' if $self->farm;
	  $self->log("${submit_text}:\t".localtime());
	  run_system_cmd($r_cmd);
	  $self->log("Finished $logic_name job:\t".localtime());
	  $self->log('See '.$self->get_dir('norm').' for ExperimentalChip QC files');

	  #Now load file and update status
	  #Import directly here to avoid having to reparse all results if we crash!!!!
	  $self->log("Importing:\t$resultfile");
	  $self->db->load_table_data("result",  $resultfile);
	  $self->log("Finishing importing:\t$resultfile");
	  

	  foreach my $echip(@chips){
		$echip->adaptor->store_status($logic_name, $echip);
	  }

	  #Recreate all non-import RSets for analysis if not already present
	  #

	  my $rset_a = $self->db->get_ResultSetAdaptor();
	  my %seen_rsets;
	 
	  foreach my $anal_rset(@{$rset_a->fetch_all_by_Experiment($self->experiment)}){
		next if($anal_rset->name =~ /_IMPORT$/o);
		next if(exists $seen_rsets{$anal_rset->name});
		next if $anal_rset->analysis->logic_name eq $norm_anal->logic_name;
		$seen_rsets{$rset->name} = 1;
		$anal_rset->analysis($norm_anal);
		$anal_rset->{'dbID'} = undef;
		$anal_rset->{'adaptor'} = undef;
		
		#add the chip_channel_ids from the new anal IMPORT set
		foreach my $table_id(@{$anal_rset->table_ids}){
		  $anal_rset->{'table_id_hash'}{$table_id} = $rset->get_chip_channel_id($table_id);
		}

		$self->log('Adding new ResultSet '.$anal_rset->name.' with analysis '.$norm_anal->logic_name);
		$rset_a->store($anal_rset);
	  }



    }
  }

  return;
}

#can we sub this? args: table_name, logic_name
#also use result_set_name
#would also clean all data for result set if recovery
#return would be result_set

}

sub add_Array {

  my $self = shift;

  #do we need to check if stored?
  if (! $_[0]->isa('Bio::EnsEMBL::Funcgen::Array')) {
    throw("Must supply a Bio::EnsEMBL::Funcgen::Array");
  } elsif (@_) {
    push @{$self->{'arrays'}}, @_;
  }
   
  throw("Does not yet support multiple array imports") if(scalar (@{$self->{'arrays'}}) > 1);
  #need to alter read_probe data at the very least
  
  return;

}

sub array_file {

  my ($self) = shift;
  $self->{'array_file'} = shift if(@_);
  return $self->{'array_file'};

}

sub array_name {

  my ($self) = shift;
  $self->{'array_name'} = shift if(@_);
  return $self->{'array_name'};

}

sub array_set {

  my ($self) = shift;
  $self->{'array_set'} = shift if(@_);
  return $self->{'array_set'};

}

sub arrays {

  my $self = shift;

  if(! defined $self->{'arrays'}){
	$self->{'arrays'} = $self->db->get_ArrayAdaptor->fetch_all_by_Experiment($self->experiment());
  }

  return $self->{'arrays'};

}

sub cache_probe_info {

  my ($self, $pname, $pid, $x, $y) = @_;

  throw('Deprecated, too memory expensive, now resolving DB duplicates and using Tied File cache');
  throw("Must provide a probe name and id") if (! defined $pname || ! defined $pid);


  #do we need to loop through the file here?
  #if we make sure we're testing for a previous dbID before storing probes then we don't need to do this
  #we can catch the error when we get the probe id as we can check for >1 id for the same probe name
  #if (defined $self->{'_probe_cache'}->{$pname} && ($self->{'_probe_cache'}->{$pname}->[0] != $pid)) {
  #  throw("Found two differing dbIDs for $pname, need to sort out redundant probe entries");
  #}
  
  $self->{'_probe_cache'}->{$pname} = (defined $x && defined $y) ? [$pid, $x, $y] : [$pid];
  
  return;

}

sub cache_slice {

  my ($self, $region_name, $cs_name) = @_;

  throw("Need to define a region_name to cache a slice from") if ! $region_name;

  $cs_name ||= 'chromosome';
  $self->{'slice_cache'} ||= {};
  $region_name =~ s/chr//;
  $region_name = "MT" if $region_name eq "M";
  
  #can we handle UN/random chromosomes here?
  
  
  if (! exists $self->{'slice_cache'}->{$region_name}) {
    $self->{'slice_cache'}->{$region_name} = $self->slice_adaptor->fetch_by_region($cs_name, $region_name);
    warn("-- Could not generate a slice for ${cs_name}:$region_name\n") if ! defined $self->{'slice_cache'}->{$region_name};
  }
  
  return $self->{'slice_cache'}->{$region_name};

}

sub cell_type {

  my ($self) = shift;

  if (@_) {
    my $ctype = shift;
    
    #do we need this as we're checking in new?
    if (! ($ctype->isa('Bio::EnsEMBL::Funcgen::CellType') && $ctype->dbID())) {
      throw("Must pass a valid stored Bio::EnsEMBL::Funcgen::CellType");
    }
  
    $self->{'cell_type'} = $ctype;
  }

  return $self->{'cell_type'};

}

sub contact {

  my ($self) = shift;
  $self->{'contact'} = shift if(@_);
  return $self->{'contact'};

}

sub create_output_dirs {

  my ($self, @dirnames) = @_;
	

  #unshift @dirnames, "";#create base output dir
  #now done in control script due to log being generated first


  foreach my $name (@dirnames) {

	if($name eq 'caches'){
	  $self->{"${name}_dir"} = $ENV{'EFG_DATA'}."/${name}/".$self->db->dbc->dbname() if(! defined $self->{"${name}_dir"});
	}
	elsif($name eq 'fastas'){
	  $self->{"${name}_dir"} = $ENV{'EFG_DATA'}."/${name}/" if(! defined $self->{"${name}_dir"});
	}
	else{
	  $self->{"${name}_dir"} = $self->get_dir('output')."/${name}" if(! defined $self->{"${name}_dir"});
	}

	if(! (-d $self->get_dir($name) || (-l $self->get_dir($name)))){
	  $self->log("Creating directory:\t".$self->get_dir($name));
	  #This did not throw with mkdir!!
	  mkpath $self->get_dir($name) || throw('Failed to create directory:    '. $self->get_dir($name));
	  chmod 0744, $self->get_dir($name);
	}
  }
  
  return;
}



#move this to SolexaDefs/ExperimentalSetDefs?

}

sub db {

  my $self = shift;

  if (defined $_[0] && $_[0]->isa("Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor")) {
    $self->{'db'} = shift;
  } elsif (defined $_[0]) {
    throw("Need to pass a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor");
  }
  
  return $self->{'db'};

}

sub dbname {

  my ($self) = shift;	
  

  deprecate('Use $imp->db->dbname');

  return $self->db->dbc->dbname;

  #$self->{'dbname'} = shift if @_;


 # if(! defined  $self->{'dbname'}){
#	warn 'Need to guess dbname here?';
#  }
    
#  return $self->{'dbname'};

}

sub description {

  my $self = shift;

  if (@_) {
	$self->{'description'} = shift;
  }

  return $self->{'description'};

}

sub design_type {

  my $self = shift;
  return $self->{'design_type'};

}

sub dump_fasta {

  my $self = shift;
  $self->{'_dump_fasta'} = shift if @_;
  return $self->{'_dump_fasta'};

}

sub experiment {

  my ($self) = shift;	

  if (@_) {
	
    if (! $_[0]->isa('Bio::EnsEMBL::Funcgen::Experiment')) {
      throw("Must pass a Bio::EnsEMBL::Funcgen::Experiment object");
    }

    $self->{'experiment'} = shift;
  }

  return $self->{'experiment'};

}

sub experiment_date {

  my ($self) = shift;	

  if (@_) {
    my $date = shift;

    if ($date !~ /[0-9]{4}-[0-9]{2}[0-9]{2}/o) {
      throw('Parameter -experiment_date needs to fe in the format: YYYY-MM-DD');
    }

    $self->{'experiment_date'} = $date;
  } elsif ($self->vendor() eq "nimblegen" && ! defined $self->{'experiment_date'}) {
    $self->{'experiment_date'} = &get_date("date", $self->get_config("chip_file")),
  }

  return $self->{'experiment_date'};

}

sub experimental_set_name {

  my $self = shift;

  $self->{'experimental_set_name'} = shift if @_;

  return $self->{'experimental_set_name'};

}

sub farm {

  my ($self, $farm) = @_;

  $self->{'farm'} ||= undef;#define farm

  if (defined $farm) {
    throw("Argument to farm must be a boolean 1 or 0")  if(! ($farm == 1 || $farm == 0));
    $self->{'farm'} = $farm;
  }

  return $self->{'farm'};

}

sub feature_analysis {

  my ($self) = shift;

  if (@_) {
    my $fanal = shift;
    
    #do we need this as we're checking in new?
    if (! (ref ($fanal) && $fanal->isa('Bio::EnsEMBL::Analysis') && $fanal->dbID())) {
      throw("Must pass a valid stored Bio::EnsEMBL::Analysis");
    }
  
    $self->{'feature_analysis'} = $fanal;
  }

  return $self->{'feature_analysis'};

}

sub feature_set_description {

  my $self = shift;

  $self->{'feature_set_description'} = shift if @_;
  
  return $self->{'feature_set_description'};

}

sub feature_type {

  my ($self) = shift;

  if (@_) {
    my $ftype = shift;
    
    #do we need this as we're checking in new?
    if (! ($ftype->isa('Bio::EnsEMBL::Funcgen::FeatureType') && $ftype->dbID())) {
      throw("Must pass a valid stored Bio::EnsEMBL::Funcgen::FeatureType");
    }
  
    $self->{'feature_type'} = $ftype;
  }

  return $self->{'feature_type'};

}

sub format {

  my ($self) = shift;	
  $self->{'format'} = shift if(@_);
  return $self->{'format'};

}

sub get_chr_seq_region_id {

  my ($self, $chr, $start, $end) = @_;
  #what about strand info?

  #do we need the start and stop?

  #use start and stop to prevent problems with scaffodl assemblies, i.e. >1 seq_region_id
  #my $slice = $self->slice_adaptor->fetch_by_region("chromosome", $chr, $start, $end);
  #we could pass the slice back to the slice adaptor for this, to avoid dbid problems betwen DBs
  
  ###would need to implement other cs's here

  return $self->slice_adaptor->fetch_by_region("chromosome", $chr, $start, $end)->get_seq_region_id();
}

#convinience method

}

sub get_config {

  my ($self, $data_name) = @_;
  return $self->get_data('config', $data_name); #will this cause undefs?

}

sub get_dir {

  my ($self, $dirname) = @_;
  return $self->get_data("${dirname}_dir");

}

sub get_import_ResultSet {

  my ($self, $anal, $table_name) = @_;

  if (!($anal && $anal->isa("Bio::EnsEMBL::Analysis") && $anal->dbID())) {
    throw("Must provide a valid stored Bio::EnsEMBL::Analysis");
  }

  $self->log("Getting import $table_name ResultSet for analysis:\t".$anal->logic_name());

  my ($rset, @new_chip_channels);
  my $result_adaptor = $self->db->get_ResultSetAdaptor();
  my $logic_name = $anal->logic_name;
  my $status = ($logic_name eq "RawValue") ? "IMPORTED" : $logic_name;

  if(($logic_name) eq 'RawValue' && ($table_name eq 'experimental_chip')){
	throw("Cannot have an ExperimentalChip ResultSet with a RawValue analysis, either specify 'channel' or another analysis");
  }

  #Build IMPORT Set for $table_name
  foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {

    #clean chip import and generate rset
	
	if($table_name eq 'experimental_chip'){
	  
	  if ($echip->has_status($status)) { #this translates to each channel have the IMPORTED_RawValue status
		$self->log("ExperimentalChip(".$echip->unique_id().") already has status:\t".$status);
	  } 
	  else {
		$self->log("Found ExperimentalChip(".$echip->unique_id().") without status $status");

		push @new_chip_channels, $echip;
	  }

	}else{#channel
	  
	  foreach my $chan(@{$echip->get_Channels()}){

		if ($chan->has_status($status)) { #this translates to each channel have the IMPORTED_RawValue status
		  $self->log("Channel(".$echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()}.") already has status:\t".$status);
		} 
		else {
		  $self->log("Found Channel(".$echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()}.") without status $status");
		  push @new_chip_channels, $chan;
		}
	  }
	}
  
	if (( ! $rset) && @new_chip_channels) {
	  my(@tmp) = @{$result_adaptor->fetch_all_by_name_Analysis($self->name()."_IMPORT", $anal)};

	  if(scalar(@tmp) > 1){
		throw('Found more than one IMPORT ResultSet for '.$self->name().'_IMPORT with analysis '.$logic_name);
	  }

	  $rset = shift @tmp;


	  #do we need to throw here if not recovery?
	  #what if we want the import result set elsewhere during the first import?
	  
	  #if ($self->recovery()) {
	  
		#fetch by anal and experiment_id
	  #Need to change this to result_set.name!
	  #	warn("add chip set handling here");
	  
		#my @tmp = @{$result_adaptor->fetch_all_by_Experiment_Analysis($self->experiment(), $anal)};
	  #throw("Found more than one ResultSet for Experiment:\t".$self->experiment->name()."\tAnalysis:\t".$anal->logic_name().')' if (scalar(@tmp) >1);
	  #$rset = $tmp[0];
	  
	  #warn "fetching rset with ".$self->name()."_IMPORT ". $anal->logic_name;
	  
	  #$rset = $result_adaptor->fetch_by_name_Analysis($self->name()."_IMPORT", $anal);
	  warn("Warning: Could not find recovery ResultSet for analysis ".$logic_name) if ! $rset;
	  #}
	  
	  if (! $rset) {
		$self->log("Generating new ResultSet for analysis ".$logic_name);
	
		$rset = Bio::EnsEMBL::Funcgen::ResultSet->new
		  (
		   -analysis   => $anal,
		   -table_name => $table_name,
		   -name       => $self->name()."_IMPORT",
		   -feature_type => $self->feature_type(),
		   -cell_type    => $self->cell_type(),
		  );
	
		#These types should be set to NULL during the MAGE-XML validation if we have more than one type in an experiment
	
		($rset) = @{$result_adaptor->store($rset)};
	  }
	}
  }

  #do we need this here as we're rolling back in the read methods?
  #we only want to roll back those chips/channels which have not been registered
  
  if ($self->recovery()) {

	my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
	
	foreach my $cc(@new_chip_channels){
	  
	  #only roll back if already part of import set
	  #Not previously registered if not 
	  if($rset->contains($cc) && $rset->get_chip_channel_id($cc->dbID())){
		
		if($table_name eq 'channel'){
		  my $chan_name = $ec_adaptor->fetch_by_dbID($cc->experimental_chip_id())->unique_id()."_".
			$self->get_config('dye_freqs')->{$cc->dye()};
		  $self->log("Rolling back results for $table_name:\t".$chan_name);
		  
		}else{
		  $self->log("Rolling back results for $table_name:\t".$cc->unique_id);
		}
		
		$self->rollback_results([$rset->get_chip_channel_id($cc->dbID())]);
	  }
	}
  }

  
  #check whether it is present in the ResultSet and add if not
  if ($rset) {
	#ids will already be present if not rset i.e. already imported

	foreach my $cc(@new_chip_channels){
	  $rset->add_table_id($cc->dbID()) if(! $rset->contains($cc));
	}
  }


  
  if ($rset) {
    $result_adaptor->store_chip_channels($rset);
  } else {
    $self->log("All ExperimentalChips have status:\t$status");
  }
  
  #this only returns a result set if there is new data to import
  return $rset;

}

sub get_probe_cache_by_Array {

  my ($self, $array, $from_db) = @_;

  my $msg = "Getting probe cache for ".$array->name();
  $msg .= " from DB" if $from_db;
  $self->log($msg);#, 1);

  if(! ($array && $array->isa('Bio::EnsEMBL::Funcgen::Array') && $array->dbID())){
	throw('Must provide a valid stored Bio::EnsEMBL::Funcgen::Array object');
  }

  my $set = 0;
  my $cache_file = $self->get_dir('caches').'/'.$array->name().'.probe_cache';

  ### Generate and resolve fresh cache from DB
  if($from_db){

	$cache_file .= '.unresolved';#This will be renamed by the caller if it is resolved

	if(exists $self->{'_probe_cache'}{$array->name()}){
	  $self->log('Rebuilding probe_cache from DB for '.$array->name(), 1);


	  #untie @{$self->{'_probe_cache'}{$array->name()}{'entries'}};
	  #close($self->{'_probe_cache'}{$array->name()}{'handle'});#do we need to do this?
	  delete $self->{'_probe_cache'}{$array->name()};#implicitly closes
	  $self->log('Deleted old cache', 1);
	}else{
	  $self->log('Building probe_cache from DB for '.$array->name(), 1);
	}
	
	#Move this to ProbeAdaptor?
	#This is where we'd set the unique key for a vendor and resolves duplicates based on the key
	my $cmd = 'SELECT name, probe_id from probe WHERE array_chip_id IN ('.join(',', @{$array->get_array_chip_ids()}).') ORDER by name, probe_id';
	$cmd = 'mysql '.$self->db->connect_string()." -e\" $cmd\" >".$cache_file;
	run_system_cmd($cmd);
	
  }
 
  ### Set cache
  if(-f $cache_file){ 
	$self->log('MD5 check here?',1);
	$self->{'_probe_cache'}{$array->name()}{'current_line'} = undef;
	$self->{'_probe_cache'}{$array->name()}{'handle'} = open_file($cache_file);

	#can we do a select count instead? and do this instead of the MD5?
	#$cmd = "wc -l $cache_file";
	#my $size = `$cmd`;

	$set = 1;
  }
  else{
	warn 'Failed to get probe cache for array:'.$array->name();
  }
 
  return $set;
}


#should reorganise these emthods to split reading the array data, and the actual data
#currently:
#meta reads array and chip data
#probe reads probe_set, probes, which should definitely be in array, probe_feature? and results
#native data format may not map to these methods directly, so may need to call previous method if required data not defined

}

sub get_probe_id_by_name_Array {

  my ($self, $name, $array) = @_;
  
  #this is only ever called for fully imported ArrayChips, as will be deleted if recovering
  $self->resolve_probe_data() if(! exists $self->{'_probe_cache'}{$array->name()});
  
  #we want to cycle through the given cache starting from the last position or 0.
  #we don't want to have to test for the size of the cache each time as this will be quite expensive
  #so we should store sizes separately along with current position


  my ($pid, $line);
 
  #check current line
  if($line = $self->{'_probe_cache'}{$array->name()}{'current_line'}){
	if($line =~ /^\Q${name}\E\t/){
	  $pid = (split/\t/o, $line)[1];
	}
  }


  if(! $pid){
	while($line = $self->{'_probe_cache'}{$array->name()}{'handle'}->getline()){
	  
	  if($line =~ /^\Q${name}\E\t/){
		$pid = (split/\t/o, $line)[1];
		$self->{'_probe_cache'}{$array->name()}{'current_line'} = $line;
		last;
	  }
	}
  }

  #do not remove this
  if(! $pid){
	throw("Did not find probe name ($name) in cache, cache may need rebuilding, results may need sorting, or do you have an anomolaous probe?")
  }else{
	chomp $pid;
  }

  return $pid;

}

sub group {

  my ($self) = shift;	
  $self->{'group'} = shift if(@_);
  return $self->{'group'};

}

sub host {

  my $self = shift;
  $self->{'host'} = shift if(@_);
  return $self->{'host'};

}

sub hybridisation_fields {

  my $self = shift;

  return ['File[raw]', 'Array[accession]', 'Array[serial]', 
		  (map 'Protocol['.$_.']', (sort (keys %{$self->get_config('protocols')}))),
		  'BioSource', 'Sample', 'Extract', 'LabeledExtract', 'Immunoprecipitate', 'Hybridization', 
		  'BioSourceMaterial', 'SampleMaterial', 'ExtractMaterial', 'LabeledExtractMaterial',
		  'Dye', 'BioMaterialCharacteristics[Organism]', 'BioMaterialCharacteristics[BioSourceType]',	
		  'BioMaterialCharacteristics[StrainOrLine]', 'BioMaterialCharacteristics[CellType]', 
		  'BioMaterialCharacteristics[Sex]', 'FactorValue[StrainOrLine]', 'FactorValue[Immunoprecipitate]'];

}

sub init_array_import {

  my ($self) = shift;

  # we need to define which paramters we'll be storing
  #use the logic names of the analyses as the field headers

  #need to test for vendor here

  #Sanger, NIMBLEGEN(no design_id issue, could get from the ndf, but we want it in the DesignNotes.txt)
  #Then we can change the Array/Chip generation to soley use the DesignNotes.txt rather than SampleKey 
  #which is experiment specific
  #or eFG format.

  $self->create_output_dirs('caches', 'fastas');

}

sub init_experiment_import {

  my ($self) = shift;

  #Change this to take config mandatory params?
  #No specific to exp import
  #Name is used in set_config anyway
  #Currently we only have array and experiment import, both of which should have names
  #Make mandatory?

  foreach my $tmp ("group", "data_dir") {#name now generically mandatory
    throw("Mandatory arg $tmp not been defined") if (! defined $self->{$tmp});
  }
  #Should we separate path on group here too, so we can have a dev/test group?
  
  #Create output dirs
  #This should be moved to the Parser to avoid generating directories which are needed for different imports
  $self->create_output_dirs('raw', 'norm', 'caches', 'fastas');
  throw("No result_files defined.") if (! defined $self->result_files());

  #Log input files
  if (@{$self->result_files()}) {
    $self->log("Found result files arguments:\n\t".join("\n\t", @{$self->result_files()}));
  }

  #check for cell||feature and warn if no met file supplied?

  if($self->norm_method){
	my $norm_anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->norm_method);

	#should we list the valid analyses?
	throw($self->norm_method.' is not a valid analysis') if ! $norm_anal;
	$self->norm_analysis($norm_anal);
  }else{
	$self->log('WARNING: No normalisation analysis specified');
  }
  
  #warn "Need to check env vars here or in Parser or just after set_config?";
  #Need generic method for checking ENV vars in Helper
  #check for ENV vars?
  #R_LIBS
  #R_PATH if ! farm
  #R_FARM_PATH 

  $self->validate_group();#import experimental_group

  #Get experiment
  my $exp_adaptor = $self->db->get_ExperimentAdaptor();  
  my $exp = $exp_adaptor->fetch_by_name($self->name());	#, $self->group());
 
  #This is only used for the first test below.
  my $xml = $exp_adaptor->fetch_mage_xml_by_experiment_name($self->name());# if $self->{'write_xml'};

  #DO NOT CHANGE THIS LOGIC!
  #write mage if we specify or we don't have a the final xml or the template
  #recovery is turned on to stop exiting when previously stored chips are found from the 'write_mage' run.
  #This does mean that if you import without running the write_mage step
  #you could potentially be overwriting someone elses experiment info
  #No way of getting around this, need to make warning obvious, add to end of log!!!
  #We always want to write and update xml and ResultSets if we're running the 2nd stage of the import
  #Why would we ever want to skip the validate process?
  #Leave for now as this is working as we want it
  #But propose to remove skip functionality
  
  if( ! $self->{'no_mage'}){

	if($self->{'write_mage'} || !( -f $self->get_config('tab2mage_file') || $xml)){
	  $self->{'write_mage'} = 1;
	  $self->backup_file($self->get_config('tab2mage_file'));
	}
	#elsif($xml && (! $self->{'update_xml'})){#Changed this so we always update
	#elsif(! $self->{'update_xml'}){



	  #Here, we need to always update_xml
	  #If we are doing the 2nd stage
	  #Currently this is skipping as we haven't explicitly set it
	  #To remove this...
	  #what we need to do is check that we don't test for update_xml, 
	  # i.e. assuming that we're running the second stage of the import.
	  # Therefore we need a boolean to set whether it is the first stage..else update_xml implicit
	  # write mage is explicit flag
	  # Or if we have not tab2mage file?
	  # we can then override this explicitly with update_xml?
	  # WE're never likely edit the xml directly, so we always want to validate and update
	  # so write mage flag become update_experiment?  No this is no obvious behaviour
	  # We need to warn about removing the write_mage flag after we have updated it
	  # Otherwise we will never get to 2nd stage


	#No mage is still valid as we may want to jus import and experiment
	#Before receiving correct meta data
	#When we can then rerun the import with -write_mage to update the resultsets

	#  $self->{'recover'} = 1;
	#  $self->{'skip_validate'} = 1;
	#}
	elsif( -f $self->get_config('tab2mage_file')){#Run Tab2Mage

	  $self->backup_file($self->get_config('mage_xml_file'));
	  my $cmd = 'tab2mage.pl -e '.$self->get_config('tab2mage_file').' -k -t '.$self->get_dir('output').
		' -c -d '.$self->get_dir('results');
	  
	  $self->log('Reading tab2mage file');
	  my $t2m_exit_code = run_system_cmd($cmd, 1);#no exit flag due to non-zero exit codes	  
	  warn "tab2mage exit code is  $t2m_exit_code"; 
	  
	  if(! ($t2m_exit_code > -1) && ($t2m_exit_code <255)){
		throw("tab2mage failed.  Please check and correct:\t".$self->get_config('tab2mage_file')."\n...and try again");
	  }
	  
	  $self->{'recover'} = 1;
	}
  }
	
  #Recovery now set so deal with experiment
  if ($self->recovery() && ($exp)) {
    $self->log("Using previously stored Experiment:\t".$exp->name);
  } elsif ((! $self->recovery()) && $exp) {
    throw("Your experiment name is already registered in the database, please choose a different\" name\", this will require renaming you input directory, or specify -recover if you are working with a failed/partial import.");
    #can we skip this and store, and then check in register experiment if it is already stored then throw if not recovery
  } else {						# (recover && exp) || (recover  && ! exp) 
 
    
    $exp = Bio::EnsEMBL::Funcgen::Experiment->new(
												  -GROUP => $self->group(),
												  -NAME  => $self->name(),
												  -DATE  => $self->experiment_date(),
												  -PRIMARY_DESIGN_TYPE => $self->design_type(),
												  -DESCRIPTION => $self->description(),
												  -ADAPTOR => $self->db->get_ExperimentAdaptor(),
												 );
    
    ($exp) =  @{$exp_adaptor->store($exp)};
  }
  
  
  $self->experiment($exp);
  
  #remove and add specific report, this is catchig some Root stuff
  #$self->log("Initiated efg import with following parameters:\n".Data::Dumper::Dumper(\$self));
  
  return;

}

sub init_tab2mage_export {

  my $self = shift;

  $self->backup_file($self->get_config('tab2mage_file')) if(-f $self->get_config('tab2mage_file'));

  my $t2m_file = open_file($self->get_config('tab2mage_file'), '>');

  #reformat this
  my $exp_section = "experiment section\ndomain\t".(split/@/, $self->contact())[1]."\naccession\t\n".
	"quality_control\tbiological_replicate\nexperiment_design_type\tbinding_site_identification\n".
	  "name\t".$self->name()."\nrelease_date\t\nsubmission_date\t\nsubmitter\t???\n".
		"submitter_email\t???\ninvestigator\t???\ninvestigator_email\t???\norganization\t???\naddress\t".
		  "???\npublication_title\t\nauthors\t\njournal\t\nvolume\t\nissue\t\npages\t\nyear\t\npubmed_id\t\n";

  my $protocol_section = "Protocol section\naccession\tname\ttext\tparameters\n";

  foreach my $protocol(sort (keys %{$self->get_config('protocols')})){
	$protocol_section .= $self->get_config('protocols')->{$protocol}->{'accession'}.
	  "\t".$self->get_config('protocols')->{$protocol}->{'name'}.
		"\t".$self->get_config('protocols')->{$protocol}->{'text'}."\t";

	$protocol_section .= (defined $self->get_config('protocols')->{$protocol}->{'parameters'}) ?
	  $self->get_config('protocols')->{$protocol}->{'parameters'}."\t\n" : "\t\n";
  }

  #File[raw]	Array[accession]	Array[serial]	Protocol[grow]	Protocol[treatment]	Protocol[extraction]	Protocol[labeling]	Protocol[hybridization]	Protocol[scanning]	BioSource	Sample	Extract	LabeledExtract	Immunoprecipitate	Hybridization	BioSourceMaterial	SampleMaterial	ExtractMaterial	LabeledExtractMaterial	Dye	BioMaterialCharacteristics[Organism]	BioMaterialCharacteristics[BioSourceType]	BioMaterialCharacteristics[StrainOrLine]	BioMaterialCharacteristics[CellType]	BioMaterialCharacteristics[Sex]	FactorValue[StrainOrLine]	FactorValue[Immunoprecipitate]


  #Need to do this bit better?
  #have array of fields.  We can then populate a hash in the read method based on field names, then use the array to print in order

  my $hyb_header = "\nHybridization section\n".join("\t", @{$self->hybridisation_fields()});

  print $t2m_file $exp_section."\n".$protocol_section."\n".$hyb_header."\n";

  return $t2m_file;
}


#Move to MAGE package?

}

sub location {

  my ($self) = shift;
  $self->{'location'} = shift if(@_);
  return $self->{'location'};

}

sub name {

  my ($self) = shift;	
  $self->{'name'} = shift if(@_);
  return $self->{'name'};

}

sub new {

  my ($caller) = shift;

  my $reg = "Bio::EnsEMBL::Registry";
  my $class = ref($caller) || $caller;

  my ($name, $format, $vendor, $group, $location, $contact, $species,
	  $array_name, $array_set, $array_file, $data_dir, $result_files,
	  $ftype_name, $ctype_name, $exp_date, $desc, $user, $host, $port, 
	  $pass, $dbname, $db, $assm_version, $design_type, $output_dir, $input_dir,
	  $farm, $ssh, $fasta, $recover, $reg_config, $write_mage, $no_mage, $eset_name, 
	  $norm_method, $old_dvd_format, $feature_analysis, $reg_db, $parser_type, 
	  $ucsc_coords, $verbose, $fset_desc, $release, $reg_host, $reg_port, $reg_user, $reg_pass)
	= rearrange(['NAME', 'FORMAT', 'VENDOR', 'GROUP', 'LOCATION', 'CONTACT', 'SPECIES', 
				 'ARRAY_NAME', 'ARRAY_SET', 'ARRAY_FILE', 'DATA_DIR', 'RESULT_FILES',
				 'FEATURE_TYPE_NAME', 'CELL_TYPE_NAME', 'EXPERIMENT_DATE', 'DESCRIPTION',
				 'USER', 'HOST', 'PORT', 'PASS', 'DBNAME', 'DB', 'ASSEMBLY', 'DESIGN_TYPE',
				 'OUTPUT_DIR', 'INPUT_DIR',	#to allow override of defaults
				 'FARM', 'SSH', 'DUMP_FASTA', 'RECOVER', 'REG_CONFIG', 'WRITE_MAGE', 
				 'NO_MAGE', 'EXPERIMENTAL_SET_NAME', 'NORM_METHOD', 'OLD_DVD_FORMAT',
				 'FEATURE_ANALYSIS', 'REGISTRY_DB', 'PARSER', 'UCSC_COORDS', 'VERBOSE',
				 'FEATURE_SET_DESCRIPTION', 'RELEASE', 'REGISTRY_HOST', 'REGISTRY_PORT',
				'REGISTRY_USER', 'REGISTRY_PASS'], @_);

  
  #### Define parent parser class based on vendor
  throw("Mandatory argument -vendor not defined") if ! defined $vendor;

  #This will override the default Vendor Parser type
  #Evals simply protect from messy errors if parser type not found
  my $parser_error;
  my $vendor_parser =  ucfirst(lc($vendor));


  #WARNING evaling these parsers to enable pluggability hides errors in parser
  #use a perl -MBio::EnsEMBL::Funcgen::Parsers:ParserType to debug
  #get rid of all this case guessing and force correct parser name usage?


  #WARNING
  #Dynamic setting of ISA in this way reports the resultant object as Importer, when 
  #some throws/methods are actually in other base/custom Parsers
  #This can seem a little counterintuitive, but allows plugability
  #With out the need for separate control scripts


  eval {require "Bio/EnsEMBL/Funcgen/Parsers/${vendor_parser}.pm";};
 
  if($@){
	  #Don't warn/throw yet as we might have a standard parser format
	
	$parser_error .= "There is no valid parser for the vendor your have specified:\t".$vendor.
	  "\nMaybe this is a typo or maybe you want to specify a default import format using the -parser option\n".$@;
  }
  

  
  if(defined $parser_type){

	#try normal case first
	eval {require "Bio/EnsEMBL/Funcgen/Parsers/${parser_type}.pm";};

	if($@){
	  $parser_type = ucfirst(lc($parser_type));

	  #Now eval the new parser
	  eval {require "Bio/EnsEMBL/Funcgen/Parsers/${parser_type}.pm";};

	  if($@){
		
		#Might be no default
		my $txt = "There is no valid parser for the -parser format your have specified:\t".$parser_type."\n";
		
		if(! $parser_error){
		  $txt .= "Maybe this is a typo or maybe you want run with the default $vendor_parser parser\n";
		}
		
		throw($txt.$@);
	  }
	  
	  #warn about over riding vendor parser here
	  if(! $parser_error){
		#Can't log this as we haven't blessed the Helper yet
		warn("WARNING\t::\tYou are over-riding the default ".$vendor." parser with -parser ".$parser_type);
	  }
	}
  }
  else{
	throw($parser_error) if $parser_error;
	$parser_type = $vendor_parser;
  }
  

  #we should now really set parser_type as an attrtibute?
  unshift @ISA, 'Bio::EnsEMBL::Funcgen::Parsers::'.$parser_type;
  #change this to be called explicitly from the load script?

  #### Create object from parent class

  my $self = $class->SUPER::new(@_);
    
  #### Set vars and test minimum mandatory params for any import type

  $self->{'name'} = $name || throw('Mandatory param -name not met');#This is not mandatory for array design import
  $self->{'user'} = $user || $ENV{'EFG_WRITE_USER'};
  $self->vendor(uc($vendor));	#already tested
  $self->{'format'} = uc($format) || 'TILED'; #remove default?
  $self->group($group) if $group;
  $self->location($location) if $location;
  $self->contact($contact) if $contact;
  $species || throw('Mandatory param -species not met');
  $self->array_name($array_name) if $array_name;
  $self->array_set($array_set) if $array_set;
  $self->array_file($array_file) if $array_file;
  $self->{'data_dir'} = $data_dir || $ENV{'EFG_DATA'};
  $self->result_files($result_files)if $result_files;
  $self->experiment_date($exp_date) if $exp_date;
  $self->description($desc) if $desc;#experiment
  $self->feature_set_description($fset_desc) if $fset_desc;
  $assm_version || throw('Mandatory param -assembly not met');
  $self->{'design_type'} = $design_type || 'binding_site_identification'; #remove default?
  $self->{'output_dir'} = $output_dir if $output_dir; #config default override
  $self->{'input_dir'} = $input_dir if $input_dir; #config default override
  $self->farm($farm) if $farm;
  $self->{'ssh'} = $ssh || 0;
  $self->{'_dump_fasta'} = $fasta || 0;
  $self->{'recover'} = $recover || 0;
  #check for ~/.ensembl_init to mirror general EnsEMBL behaviour
  $self->{'reg_config'} = $reg_config || ((-f "$ENV{'HOME'}/.ensembl_init") ? "$ENV{'HOME'}/.ensembl_init" : undef);
  #$self->{'update_xml'} = $update_xml || 0;
  $self->{'write_mage'} = $write_mage || 0;
  $self->{'no_mage'} = $no_mage || 0;
  $self->{'experimental_set_name'} = $eset_name if $eset_name;
  $self->{'old_dvd_format'} = $old_dvd_format || 0;
  $self->{'ucsc_coords'} = $ucsc_coords || 0;
  $self->{'verbose'} = $verbose || 0;
  $self->{'release'} = $release;

 
  if($reg_host && $self->{'reg_config'}){
	warn "You have specified registry parameters and a config file:\t".$self->{'reg_config'}.
	  "\nOver-riding config file with specified paramters:\t${reg_user}@${reg_host}:$reg_port";
  }


  #Will a general norm method be applicable for all imports?
  #Already casued problems with Bed imports... remove?
  #Could set NORM_METHOD in Parser!!
  warn "Need to fully implement norm_method is validate_mage, remove ENV NORM_METHOD?";
  $self->{'norm_method'} = $norm_method || $ENV{'NORM_METHOD'};
 
  if ($self->vendor ne 'NIMBLEGEN'){
	$self->{'no_mage'} = 1;
	warn "Hardcoding no_mage for non-NIMBLEGEN imports";
  }

 
  if($self->{'no_mage'} && $self->{'write_mage'}){
	throw('-no_mage and -write_mage options are mutually exclusive, please select just one');
  }

  #### Set up DBs and load and reconfig registry

  ### Load Registry
  #Can we load the registry using the assembly version, then just redefine the efg DB?
  #We have problems here if we try and load on a dev version, where no dev DBs are available on ensembldb
  #Get the latest API version for the assembly we want to use
  #Then load the registry from that version
  #Then we can remove some of the dnadb setting code below?

  #How does the registry pick up the schema version??

  #We should really load the registry first given the dnadb assembly version
  #Then reset the eFG DB as appropriate


  if ($reg_host || ! defined $self->{'_reg_config'}) {
	#defaults to current ensembl DBs
	$reg_host ||= 'ensembldb.ensembl.org';
	$reg_user ||= 'anonymous';

	#Default to the most recent port for ensdb
	if(! $reg_port && $reg_host eq 'ensdb-archive'){
	  $reg_port = 5304;
	}

	#This will try and load the dev DBs if we are using v49 schema or API?
	#Need to be mindful about this when developing
	#we need to tip all this on it's head and load the reg from the dnadb version!!!!!!!

	my $version_text= ($self->{'release'}) ? 'version '.$self->{'release'} : 'current version';
	$self->log("Loading $version_text registry from $reg_user".'@'.$reg_host);

	#Note this defaults API version, hence running with head code
	#And not specifying a release version will find not head version
	#DBs on ensembldb, resulting in an exception from reset_DBAdaptor below
	$reg->load_registry_from_db(
								-host => $reg_host,
								-user => $reg_user,
								-port => $reg_port,
								-pass => $reg_pass,
								#-host => "ens-staging",
								#-user => 'ensro',
								-db_version => $self->{'release'},#51
								-verbose => $self->verbose,
							   );

	
	throw('Not sensible to set the import DB as the default eFG DB from ensembldb, please define db params') if ((! $dbname) && (! $db));
  }
  else{
	$self->log("Loading registry from:\t".$self->{'_reg_config'});
	$reg->load_all($self->{'_reg_config'}, 1);
  }



  #Validate species
  my $alias = $reg->get_alias($species) || throw("Could not find valid species alias for $species\nYou might want to clean up:\t".$self->get_dir('output'));
  $self->species($alias);
  $self->{'param_species'} = $species;#Only used for dir generation

  
  #SET UP DBs
  if($db){
	#db will have been defined before reg loaded, so will be present in reg

	if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
	  $self->throw('-db must be a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
	}
  }
  else{
	#define eFG DB from params or registry

	if($reg_db){#load eFG DB from reg

	  #we should throw here if db params are set
	  #How do we handle passwords within the reg config?
	  #Do we still need to pass this when loading?

	  $self->log('WARNING: Loading eFG DB from Registry');
	  $db = $reg->get_DBAdaptor($self->species(), 'funcgen');
	  throw("Unable to retrieve ".$self->species." funcgen DB from the registry") if ! $db;
	}
	else{#from params
	  #This resets the eFG DB in the custom or generic registry

	  #Need to check for mandatory params here

	  $dbname || throw('Must provide a -dbname if not using default custom registry config');
	  #$user || throw('Must provide a -user parameter');#make this default to EFG_WRITE_USER?
	  $pass || throw('Must provide a -pass parameter');
	 
	  #remove this and throw?
	  if(! defined $host){
		$self->log('WARNING: Defaulting to localhost');
		$host = 'localhost';
	  }
	  
	  $port ||= 3306;
	  my $host_ip = '127.0.0.1';#is this valid for all localhosts?
	  
	  if ($self->{'ssh'}) {
		$host = `host localhost`; #mac specific? nslookup localhost wont work on server/non-PC 
		#will this always be the same?
		
		if (! (exists $ENV{'EFG_HOST_IP'})) {
		  warn "Environment variable EFG_HOST_IP not set for ssh mode, defaulting to $host_ip for $host";
		} else {
		  $host_ip = $ENV{'EFG_HOST_IP'};
		}
		
		if ($self->host() ne 'localhost') {
		  warn "Overriding host ".$self->host()." for ssh connection via localhost($host_ip)";
		}
	  }
	

	  #data version is only used if we don't want to define the dbname
	  #This should never be guessed so don't need data_version here
	  #$dbname ||= $self->species()."_funcgen_".$self->data_version();


	  #Remove block below when we can
	  my $dbhost = ($self->{'ssh'}) ? $host_ip : $host;

	  #This isn't set yet!?
	  #When we try to load, say 49, when we only have 48 on ensembldb
	  #This fails because there is not DB set for v49, as it is not on ensembl DB
	  #In this case we need to load from the previous version
	  #Trap this and suggest using the -schema_version/release option
	  #Can we autodetect this and reload the registry?
	  #We want to reload the registry anyway with the right version corresponding to the dnadb

	  #warn "reseting adaptor";

	  #We could either test for the db in the regsitry or just pass the class.

	  $db = $reg->reset_DBAdaptor($self->species(), 'funcgen', $dbname, $dbhost, $port, $self->user, $pass,
								  #'Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor',
								 {
								  -dnadb_host => $reg_host,
								  -dnadb_port => $reg_port,
								  -dnadb_assembly => $assm_version,
								  -dnadb_user => $reg_user,
								  -dnadb_pass => $reg_pass,
								 });


	  #ConfigRegistry will try ans set this
	  #This will fail if there is already one in the registry as it will try
	  #and defined a new unique species so as not to overwrite the original
	  #e.g. homo_sapiens1
	  
	  #This is why it was orignally written backwards as we can't easily dynamically redefine
	  #an adaptor in the registry without ConfigRegistry trying to change the name
	  #the very act of creating a new db to redefine the registry with causes ConfigRegistry
	  #to try and register it with a unique species name

	  #Delete the old funcgen DB from the registry first
	  #$reg->remove_DBAdaptor($self->species, 'funcgen');

	  #ConfigRegistry will automatically configure this new db
	  
	  #$db = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(
	#													 -user => $user,
	#													 -host => ($self->{'ssh'}) ? $host_ip : $host,
	#													 -port => $port,
	#													 -pass => $pass,
	#													 #we need to pass dbname else we can use non-standard dbs
	#													 -dbname => $dbname,
	#													 -species => $self->species(),
	#													 -group    => 'funcgen',
	#													);


	  #if we get a species like homo_sapiens1 here
	  #This is because ConfigRegistry is try to make the dbname different between the 
	  #one already present and the one you're trying to add
	}
  }

    
  ### VALIDATE DNADB
  #This is now done in DBAdaptor

  #We can change this to just use the assembly version
  #we could even have the wordy assmelby version from the meta table
  #do the standard ensembl subs
  #s/[A-Za-z]//g;
  #s/\.//g;
  #And then validate?
  #Just stick to number version for now.


  #Now we need to set the dnadb_host params to avoid ensembldb defaults
  #This should check the registry first
  #Then load from the registry db?
  #If we have a multi host registry config file this isn't going to work!

  #Is this required anymore as the DBAdaptor handles this?
  #Not if we pass a db with an incorrect dnadb attached.

  #if($db->_get_schema_build($db->dnadb()) !~ /_[0-9]+_${assm_version}[a-z]*$/){
#	my $warning = "WARNING: dnadb does not match assembly_version $assm_version. Using ensembldb.enembl.org to define the dnadb";
#	$warning .= ' rather than the reg_config' if (defined $self->{'_reg_config'});

	#We need to account for reg_config DBs which may have custom info in
	#So try reg_config host first, then try ensembldb with warning
	#Could have a reg_config only flag for core dbs
	#Need to implement more params in set_dnadb_by_assembly_version
#	$self->log($warning);

#	$db->set_dnadb_by_assembly_version($assm_version);
#  }




  #Test connections
  $self->db($db);
  $db->dbc->db_handle;
  $db->dnadb->dbc->db_handle;
  #Set re/disconnect options
  $db->dbc->disconnect_when_inactive(1);
  $db->dnadb->dbc->disconnect_when_inactive(1);

 

  ### Check analyses/feature_type/cell_type
  if($feature_analysis){
	my $fanal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($feature_analysis);
 	throw("The Feature Analysis $feature_analysis does not exist in the database") if(!$fanal);
	$self->feature_analysis($fanal);
  }

  if($ctype_name){
	my $ctype = $self->db->get_CellTypeAdaptor->fetch_by_name($ctype_name);
 	throw("The CellType $ctype_name does not exist in the database") if(!$ctype);
	$self->cell_type($ctype);
  }

  if ($ftype_name) {
    my $ftype = $self->db->get_FeatureTypeAdaptor->fetch_by_name($ftype_name);
	throw("The FeatureType $ftype_name does not exist in the database") if(!$ftype);
	$self->feature_type($ftype);
  }


  #Set config here instead?
  #So we can check all mandatory params
  #Set vendor specific attr dependent vars
  
  #Generic input dir
  $self->{'input_dir'} ||= $self->get_dir("data").'/input/'.$self->{'param_species'}.'/'.$self->vendor().'/'.$self->name();
  throw('input_dir is not defined or does not exist ('.
		$self->get_dir('input').')') if(! -d $self->get_dir('input')); #Helper would fail first on log/debug files

  #Parser specific config
  $self->set_config();


  $self->debug(2, "Importer class instance created.");
  $self->debug_hash(3,\$ self);
    
  return ($self);

}

sub norm_analysis {

  my ($self) = shift;

  if (@_) {
    my $anal = shift;
    
    #do we need this as we're checking in new?
    if (! (ref($anal) && $anal->isa('Bio::EnsEMBL::Analysis') && $anal->dbID())) {
      throw("Must pass a valid stored Bio::EnsEMBL::Analysis");
    }
  
    $self->{'norm_analysis'} = $anal;
  }

  return $self->{'norm_analysis'};

}

sub norm_method {

  my $self = shift;

  if (@_) {
    $self->{'norm_method'} = shift;
  } elsif (! defined  $self->{'norm_method'}) {
    $self->{'norm_method'}= $self->get_config('norm_method');
  }

  return $self->{'norm_method'};

}

sub pass {

  my $self = shift;
  $self->{'pass'} = shift if(@_);
  return $self->{'pass'};

}

sub port {

  my $self = shift;
  $self->{'port'} = shift if(@_);
  return $self->{'port'};

}

sub read_data {

  my($self, $data_type) = @_;
  map {my $method = "read_${_}_data"; $self->$method()} @{$self->get_config("${data_type}_data")};
  return;

}

sub recovery {

  my $self = shift;
  $self->{'recover'} = shift if(@_);
  return $self->{'recover'};

}

sub register_experiment {

  my ($self) = shift;
  
  #Need to check for dnadb passed with adaptor to contructor
  if (@_) { 

    if ( ! $_[0]->isa("Bio::EnsEMBL::DBSQL::DBAdaptor")) {
      throw("You need to pass a valid dnadb adaptor to register the experiment");
    }
    $self->db->dnadb($_[0]);
  } elsif ( ! $self->db) {
    throw("You need to pass/set a DBAdaptor with a DNADB attached of the relevant data version");
  }
  
  #This could still be the default core db for the current version
  #warn here if not passed DB?
  #These should be vendor independent, only the read methods should need specific order?
  
  $self->init_experiment_import();
  #can we just have init here instead?


  
  if($self->{'write_mage'} || $self->{'no_mage'}){
	$self->read_data("array");

	if(! $self->{'no_mage'}){
	  $self->log("PLEASE CHECK AND EDIT AUTOGENERATED TAB2MAGE FILE:\t".$self->get_config('tab2mage_file'));
	  #we could make this print only if it was set by the user, not by the Importer
	  $self->log('REMEMBER TO REMOVE -write_mage FLAG BEFORE UPDATING');
	  exit;
	}
  }
  elsif(! $self->{'no_mage'}){#This should be a no_channel flag, set dependent on import mode(gff_chip, gff_chan)
	#Need to accomodate chip level imports in validate?
	$self->validate_mage() if (! $self->{'skip_validate'});
  }

  $self->read_data("probe");
  $self->read_data("results");  
  #Need to be able to run this separately, so we can normalise previously imported sets with different methods
  #should be able t do this without raw data files e.g. retrieve info from DB

  my $norm_method = $self->norm_method();
  
  if (defined $norm_method) {
	$self->R_norm($norm_method);
	#change this to $self->$norm_method
	#so we can have non-R_norm normalisation
  }
  
  
  return;

}

sub registry_host {

  return $_[0]->{'reg_host'};

}

sub registry_pass {

  return $_[0]->{'reg_pass'};
}


#init method kept separate from new due to differing madatory check and set up

}

sub registry_port {

  return $_[0]->{'reg_port'};

}

sub registry_user {

  return $_[0]->{'reg_user'};

}

sub resolve_probe_data {

  my $self = shift;

  $self->log("Resolving probe data", 1);

  warn "Probe cache resolution needs to accomodate probesets too!";

  foreach my $array(@{$self->arrays()}){
	my $resolve = 0;
	
	if($self->get_probe_cache_by_Array($array)){#cache already generated

	  #check if we have any new unresolved array chips to add to the cache
	  foreach my $achip(@{$array->get_ArrayChips()}){
		
		if($achip->has_status('RESOLVED')){
		  $self->log("ArrayChip has RESOLVED status:\t".$achip->design_id());#, 1);
		  next;
		}else{
		  $self->log("Found un-RESOLVED ArrayChip:\t".$achip->design_id());
		  $resolve = 1;
		  last;
		}
	  }
	}else{#no cache file
	  $resolve = 1;
	  $self->log('No probe cache found for array '.$array->name());
	}

	if($resolve){
	  $self->log('Resolving array duplicates('.$array->name().') and rebuilding probe cache.', 1);
	  $self->get_probe_cache_by_Array($array, 1);#get from DB

	  #we need ot make sure we mark cache as unresolved, so we don't use it by mistake.
	  


	  my ($line, $name, $pid, @pids);
	  #my $index = 0;
	  my $tmp_name = '';
	  my $tmp_id = '';

	  #miss the header

	  while ($line = $self->{'_probe_cache'}{$array->name}{'handle'}->getline()){
		($name, $pid) = split/\t/o, $line;

		if($name eq $tmp_name){

		  if($pid != $tmp_id){
			push @pids, $pid;
			#should reset to pid here if we have x y data else undef
			#ignore this and force result to have x y
		  }

		  #can't do this naymore unless we figure out how to move the line pointer
		  #would still need to sed the file anyway, better to regen from DB?
		  #undef $self->{'_probe_cache'}{$array->name}{'entries'}->[$i];#delete true or to be resolved duplicate
		}
		elsif($name ne $tmp_name){#new probe
		  $self->tidy_duplicates(\@pids) if(scalar(@pids) > 1);
		  $tmp_name = $name;
		  $tmp_id = $pid;
		  @pids = ($pid);
		  #$index = $i + 1;
		}
	  }

	  $self->tidy_duplicates(\@pids) if(scalar(@pids) > 1);

	  #rename resovled cache and reset cache handle
	  my $cmd = 'mv '.$self->get_dir('caches').'/'.$array->name().'.probe_cache.unresolved '.
		$self->get_dir('caches').'/'.$array->name().'.probe_cache';

	  run_system_cmd($cmd);
	  $self->get_probe_cache_by_Array($array); #This sets the caches


	  #warn "Only generate MD5 here, as this is guranteed to be correct";
	  
	  foreach my $achip(@{$array->get_ArrayChips()}){
		
		if(! $achip->has_status('RESOLVED')){
		  $self->log("Updating ArrayChip to RESOLVED status:\t".$achip->design_id());
		  $achip->adaptor->store_status('RESOLVED', $achip);
		}
	  }
	  
	  $self->log('Finished building probe cache for '.$array->name(), 1);
	}
  }

  $self->log('Finished resolving probe data', 1);

  return;

}

sub result_files {

  my ($self) = shift;	
  $self->{'result_files'} = shift if(@_);
  return $self->{'result_files'};

}

sub slice_adaptor {

  my $self = shift;

  if (! defined $self->{'slice_adaptor'}) {
	$self->{'slice_adaptor'} =  $self->db->get_SliceAdaptor();
  }

  return $self->{'slice_adaptor'};

}

sub species {

  my $self = shift;

  #should we do reg alias look up here?
  #Will we ever want to redefine species?
  #Change to just getter?

  $self->{'species'} = shift if(@_);
	
  return $self->{'species'};

}

sub store_set_probes_features {

  my ($self, $ac_id, $pf_hash, $ops) = @_;
  
  ### Deal with ProbeSets
  if ($ops) {
    $ops->size(scalar(keys %$pf_hash));
    ($ops) = $self->db->get_ProbeSetAdaptor->store($ops);
  }

  #If we're going to validate fully, we need to check for probes in this probeset on this array chip
  #Update size if we have any new probes
  #Overkill? Only do on recover? Do not read if array chip is IMPORTED 
  #This does not make any attempt to validate probes/set vs previously stored data
   
  for my $probe_id (keys %$pf_hash) {
    
    #set probeset in probe and store
    #the process corresponding feature
    my $probe = $pf_hash->{$probe_id}->{'probe'};
    $probe->probeset($ops) if $ops;
    ($probe) = @{$self->db->get_ProbeAdaptor->store($probe)};

	#Can't use get_all_Arrays here as we can't guarantee this will only ever be the array we've generated
    #Might dynamically load array if non-present
    #This is allowing multiple dbIDs per probe???  Is this wrong?
    #$self->cache_probe_info($probe->get_probename(), $probe->dbID());###########Remove as we're now importing all then resolving

        
    foreach my $feature (@{$pf_hash->{$probe_id}->{'features'}}) {
      $feature->probe($probe);
      ($feature) = @{$self->db->get_ProbeFeatureAdaptor->store($feature)};
    }
  }

  undef $ops;					#Will this persist in the caller?
  undef %{$pf_hash};

  return;

}

sub tidy_duplicates {

  my ($self, $pids) = @_;

  my $pfa = $self->db->get_ProbeFeatureAdaptor();
  my ($feature, %features);
			
  foreach my $dup_id(@$pids){
			  
	foreach $feature(@{$pfa->fetch_all_by_Probe_id($dup_id)}){
	  #can we safely assume end will be same too?
	  push @{$features{$feature->seq_region_name().':'.$feature->start()}}, $feature;
	}
  }
			
  my (@reassign_ids, @delete_ids);
			
  foreach my $seq_start_key(keys %features){
	my $reassign_features = 1;
	
	foreach $feature(@{$features{$seq_start_key}}){
	  
	  if($feature->probe_id() == $pids->[0]){
		$reassign_features = 0;
	  }else{
		push @delete_ids, $feature->dbID();
	  }
	}
	
	#This assumes that we actually have at least one element to every seq_start_key array
	if($reassign_features){
	  my $new_fid = pop @delete_ids;
	  push @reassign_ids, $new_fid;
	}
  }
			
  #resolve features first so we don't get any orphaned features if we crash.
  $pfa->reassign_features_to_probe(\@reassign_ids, $pids->[0]) if @reassign_ids;
  $pfa->delete_features(\@delete_ids) if @delete_ids;

  return;
}



1;

}

sub ucsc_coords {

  my $self = shift;
  return $self->{'ucsc_coords'};

}

sub user {

  my $self = shift;
  $self->{'user'} = shift if(@_);
  return $self->{'user'};

}

sub validate_group {

  my ($self) = shift;

  my $group_ref = $self->db->fetch_group_details($self->group());

  if (! $group_ref) {
    if ($self->location() && $self->contact()) {
      $self->db->import_group($self->group(), $self->location, $self->contact());
    } else {
      throw("Group ".$self->group()." does not exist, please specify a location and contact to register the group");
    }
  }
  
  return;

}

sub validate_mage() {

  my ($self, $mage_xml, $update) = @_;

  $self->log("Validating mage file:\t".$self->get_config('mage_xml_file'));


  #Updating ResultSets:
  #Given that we might want to add a chip to an experiment we will also need to update the tab2MAGE
  #mage_xml and ResultSets accordingly.

  #This should happen if we specify update_xml
  #Should recovery also always force update?
  #Considering the two run modes, write tab2mage & validate and import
  #There is a subtle difference between recovery and update mage_xml
  #Do we always run in recovery mode for the validate&import step?
  #Yes we do, so can't guarantee the this means we want to update.
  #So we need to change update_xml to update to reflect the changed functionality on ResultSets
  
  #If we run an update without on then chips will be loaded but xml and ResultSets will not be altered :(
  #If we're running the 2nd stage we should always be updating the xml anyway!!!!
  #As there is no reason to rerun the validate & import step without it.(unless we're debugging maybe)
  #So why should we ever run without it?

  #To update ResultSets we validate as normal and then update where appropriate
  #What has precedence? Replicate name?
  #Update echip types as appropriate
  #What if this invalidates original rsets?
  #Then list sets not covered for removal by script?



  my (%echips, @log);
  my $rset_adaptor = $self->db->get_ResultSetAdaptor;
  my $chan_anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('RawValue');

  #need to change this to default analysis
  #There we issues with setting VSN_GLOG as an env var
  #as this is tested for and the norm was skipped for some reason?
  my $chip_anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->norm_method());

  #Try import sets like this first, so we know ther is new data
  my $chan_rset = $self->get_import_ResultSet($chan_anal, 'channel');
  my $rset =  $self->get_import_ResultSet($chip_anal, 'experimental_chip');


  #Else get them anyway and log
  if(! $rset){
	
	if($chan_rset){
	  $self->log('Identified partial Channel only import, updating MAGE-XML');
	}
	else{
	  ($chan_rset) = @{$rset_adaptor->fetch_all_by_name_Analysis($self->experiment->name.'_IMPORT', $chan_anal)};
	  #Don't need to test for >1 here as this has already been done in get_import_ResultSet
	  $self->log('All ExperimentalChips imported, updating MAGE-XML only');
	}

	($rset) = @{$rset_adaptor->fetch_all_by_name_Analysis($self->experiment->name.'_IMPORT', $chip_anal)};
  }


  #This will never happen now due to the change tab2mage rules in init_experiment
  #Remove?
  if(! $rset){
	throw('Cannot find ResultSet, are you trying to import a new experiment which already has a tab2mage file present?  Try removing the file, or specifying the -write_mage flag to parse_and_import.pl');
  }

  if(! -l $self->get_dir('output').'/MAGE-ML.dtd'){
	system('ln -s '.$ENV{'EFG_DATA'}.'/MAGE-ML.dtd '.$self->get_dir('output').'/MAGE-ML.dtd') == 0 || 
	  throw('Failed to link MAGE-ML.dtd');
  }
 

  $self->log('VALIDATING MAGE XML');
  my $reader = Bio::MAGE::XML::Reader->new();
  $mage_xml ||= $self->get_config('mage_xml_file');
  $self->{'mage'} = $reader->read($mage_xml);
  
  #this should only ever return 1 for an import
  foreach my $mage_exp(@{$self->{'mage'}->getExperiment_package->getExperiment_list()}){	

	if($mage_exp->getName() ne $self->name()){
	  $self->log('MAGE experiment name ('.$mage_exp->getName().') does not match import name ('.$self->name().')');
	}
	
	#add more experiment level validation here?

	foreach my $assay (@{$mage_exp->getBioAssays()}){

	  if($assay->isa('Bio::MAGE::BioAssay::PhysicalBioAssay')){#channel
		$self->log('Validating PhysicalBioAssay "'.$assay->getName()."'\n");#hyb name(this is the file name for measured assays

		my $bioassc = $assay->getBioAssayCreation();#This is a Hybridisation
		my $array = $bioassc->getArray();#this is an ArrayChip
		my $design_id = $array->getArrayDesign->getIdentifier();
		my $chip_uid = $array->getArrayIdentifier();
	
	
		foreach my $echip(@{$rset->get_ExperimentalChips()}){
		

		  if($echip->unique_id() eq $chip_uid){
			$self->log("Found ExperimentalChip:\t".$chip_uid);

			if(! exists $echips{$chip_uid}){
			  $echips{$chip_uid} = {(
									 total_biorep            => undef,
									 total_biotechrep        => undef,
									 experimental_biorep     => undef,
									 experimental_biotechrep => undef,
									 total_dye               => undef,
									 experimental_dye        => undef,
									 cell_type               => undef,
									 feature_type            => undef,
									)};
			}

			#Validate ArrayChip
			my ($achip) = @{$self->db->get_ArrayChipAdaptor->fetch_all_by_ExperimentalChips([$echip])};

			if($achip->design_id() ne $design_id){
			  push @log, "ArrayDesign Identifier (${design_id}) does not match ArrayChip design ID (".
				$achip->design_id().")\n\tSkipping channel and replicate validation";
			  #skip the channel/replicate validation here?	  
			} 
			else {			#validate channels and replicate names
					
			  foreach my $src_biomat (@{$bioassc->getSourceBioMaterialMeasurements()}) { #Channel materials(X1)?
				my $biomat = $src_biomat->getBioMaterial();	#LabelledExtract (IP/Control)
				#we could sub this passing $echip and biomat?
				#messy to pass regexs and populate correct echip hash attrs
				#also messy to populate log
				#keeping nested loop also prevents further obfuscation
				#do we need to do all the defined checks, or maybe just the first one?
				#Then we can skip all following warning?

				foreach my $treat (@{$biomat->getTreatments()}) {
				  #As there is effectively one more level of material extraction for the IP channel
				  #this loop will returns materials an iteration out of sync for each channel

				  foreach my $ssrc_biomat (@{$treat->getSourceBioMaterialMeasurements()}) {	#Channel measurement(x1)
					my $sbiomat = $ssrc_biomat->getBioMaterial();
					#This will either be techrep name for control of IP name for experimental channel
					#SOM0035_BR1_TR2 IP  #Immunoprecicpitate
					#SOM0035_BR1_TR2     #Extract
				
					if ($sbiomat->getName() =~ /BR[0-9]+_TR[0-9]+$/o) { #Total

					  if (! defined $echips{$chip_uid}{'total_biotechrep'}) {
						$echips{$chip_uid}{'total_biotechrep'} = $sbiomat->getName();
					  }
					  else{
						push @log, "Found two TOTAL Channels on same chip with biotechreps:\t".$sbiomat->getName().
						  " and ".$echips{$chip_uid}{'total_biotechrep'};
					  }
					}else{#Experimental

					  #get feature type from assay
					  my $fv_ref = $assay->getBioAssayFactorValues();
					  if(! defined $fv_ref){
						throw('No FactorValues found, you must populate the "Immunoprecipitate" field. Maybe you forgot to specify -feature_type?'); 
					  }

					  my ($feature_type);
					  
					  foreach my $fvalue(@{$fv_ref}){
						
						if($fvalue->getValue()->getCategory() eq 'Immunoprecipitate'){
						  $feature_type = $fvalue->getName();
						  $feature_type =~ s/anti\s*-\s*//;
						  $feature_type =~ s/\s*antibody\s*//;
						}
					  }
					  $echips{$chip_uid}{'feature_type'} = $feature_type;
					}

					foreach my $ttreat (@{$sbiomat->getTreatments()}) {
									  
					  foreach my $tsrc_biomat (@{$ttreat->getSourceBioMaterialMeasurements()}) {
						my $tbiomat = $tsrc_biomat->getBioMaterial();
						#SOM0035_BR1_TR2     #Extract (exp)
						#SOM0035_BR1              #Sample (total)
									   	
						if ($tbiomat->getName() =~ /BR[0-9]+_TR[0-9]+$/o) { #experimental
						  
						  if (! defined $echips{$chip_uid}{'experimental_biotechrep'}) {
							$echips{$chip_uid}{'experimental_biotechrep'} = $tbiomat->getName();
						  }
						  else{
							push @log, "Found two EXPERIMENTAL Channels on same chip with biotechreps:\t".$tbiomat->getName().
							  " and ".$echips{$chip_uid}{'experimental_biotechrep'};
						  }
						
						  my $dye = $biomat->getLabels()->[0]->getName();
							
						  foreach my $chan (@{$echip->get_Channels()}) {
							  
							if ($chan->type() eq 'EXPERIMENTAL') {
								
							  if (uc($dye) ne uc($chan->dye())) {
								push @log, "EXPERIMENTAL channel dye mismatch:\tMAGE = ".uc($dye).' vs DB '.uc($chan->dye);
							  } else {
								$echips{$chip_uid}{'experimental_dye'} = uc($dye);
							  }
							}
						  }
						} 
						else { #control
											  
						  if (! defined $echips{$chip_uid}{'total_biorep'}) {
							$echips{$chip_uid}{'total_biorep'} = $tbiomat->getName();
						  }
						  else{
							push @log, "Found two TOTAL Channels on same chip with biotechreps:\t".$tbiomat->getName().
							  " and ".$echips{$chip_uid}{'total_biorep'};
						  }
						  
						  my $dye = $biomat->getLabels()->[0]->getName();
						
						  foreach my $chan (@{$echip->get_Channels()}) {
						  
							if ($chan->type() eq 'TOTAL') {
							
							  if (uc($dye) ne uc($chan->dye())) {
								push @log, "TOTAL channel dye mismatch:\tMAGE = ".uc($dye).' vs DB '.uc($chan->dye);
							  } 
							  else {
								$echips{$chip_uid}{'total_dye'} = uc($dye);
							  }
							}
						  }
						}
						#could do one more iteration and get Source and FeatureType?
						#we should really extend this, and then update the EC cell_type and feature_types
						#these features might not be biotmats tho...need to check


						foreach my $ftreat (@{$tbiomat->getTreatments()}) {
									  
						  foreach my $fsrc_biomat (@{$ftreat->getSourceBioMaterialMeasurements()}) {
							my $fbiomat = $fsrc_biomat->getBioMaterial();
							#EXPERIMENTAL - biorep 
							#TOTAL        - source/cell type
							my $cell_type;

							if($fbiomat->getName() =~ /BR[0-9]+$/o){#EXPERIMETNAL
							
							  if(! defined $echips{$chip_uid}{'experimental_biorep'}){
								$echips{$chip_uid}{'experimental_biorep'} = $fbiomat->getName();
							  }
							  else{
								push @log, "Found two Experimental Channels on same chip with bioreps:\t".$fbiomat->getName().
								  " and ".$echips{$chip_uid}{'experimental_biorep'};
							  }


							  #last treatment/measurement/biomat level should go here
							  #as TOTAL channel does not have another level and will fail
							  foreach my $xtreat (@{$fbiomat->getTreatments()}) {
								
								foreach my $xsrc_biomat (@{$xtreat->getSourceBioMaterialMeasurements()}) {
								  my $xbiomat = $xsrc_biomat->getBioMaterial();
								  
								  foreach my $char(@{$xbiomat->getCharacteristics()}){
									$cell_type = $char->getValue() if($char->getCategory() eq 'CellType');
								  }
								}
							  }

							}else{#this should be BioSource
							  #which should have CellType as characteristic
							  #we could change tab2mage and have this as a factor value, 
							  #but don't want to start messing with "standard" format
						
							  foreach my $char(@{$fbiomat->getCharacteristics()}){
								$cell_type = $char->getValue() if($char->getCategory() eq 'CellType');
							  }
							}
						
							#can have cell_type validation here
							if(! defined $echips{$chip_uid}{'cell_type'}){
							  $echips{$chip_uid}{'cell_type'} = $cell_type;
							}
							elsif( $echips{$chip_uid}{'cell_type'} ne $cell_type){
							  push @log, "Found Channels on same chip (${chip_uid}) with different cell types:\t".
								$cell_type." and ".$echips{$chip_uid}{'cell_type'};
							}
						  }
						}
					  }
					}
				  }
				}
			  }
			}
		  }						#end of echip
		}						#end of foreach echip
	  }							#end of physbioassay	
	}							#end of foreach assay
  }								#end of foreach exp



  #we should fail here with log before we update the result sets

   #we need to build rep names
  #we're currently using sample labels, in the tab2mage file
  #altho' previous sets have been using exp name
  #these have been manually patched afterwards

  #More desirable to have exp name as rset name, but no way of doing BR validation
  #based on sample label, if we don't have it in the tab2mage
  #if we change it in the DB then we need to update the tab2mage

  #no way to do this when generating tab2mage as the user hasn't yet defined the reps
  #we could just make reps based on sample labels
  #then we just assume that alterations made by the user are correct
  #as we can no longer validate using sample labels
  #can still validate using cell/feature type

  #no longer need vendor specific validation as this will be done in tab2mage generation


  #We need to validate reps here
  #then update ec records as appropriate and then create rsets

  my (%bio_reps, %tech_reps);
  my $ct_adaptor = $self->db->get_CellTypeAdaptor();
  my $ft_adaptor = $self->db->get_FeatureTypeAdaptor();
 
#select rs.*, ec.*, c.* from result_set rs, chip_channel cc, channel c, experimental_chip ec where rs.result_set_id=cc.result_set_id and cc.table_name='experimental_chip' and cc.table_id=ec.experimental_chip_id and cc.table_id=c.experimental_chip_id order by name;

  foreach my $echip (@{$rset->get_ExperimentalChips()}) {

	my ($biorep, $biotechrep);

	if (! exists $echips{$echip->unique_id()}) {
	  push @log, "No MAGE entry found for ExperimentalChip:\t".$echip->unique_id();
	} 
	else {

	  foreach my $chan_type('total', 'experimental'){
		
		$biorep = $echips{$echip->unique_id()}{$chan_type.'_biorep'};
		$biotechrep = $echips{$echip->unique_id()}{$chan_type.'_biotechrep'};

		if (! defined $biotechrep) {
		  push @log, 'ExperimentalChip('.$echip->unique_id().') Extract field do not meet naming convention(SAMPLE_BRN_TRN)';
		}							#! defined biorep? will never occur at present
		elsif ($biotechrep !~ /\Q$biorep\E/) {
		  push @log, "Found Extract(techrep) vs Sample(biorep) naming mismatch\t${biotechrep}\tvs\t$biorep";
		} 
		
		if ( ! $echips{$echip->unique_id()}{$chan_type.'_dye'}) {
		  push @log, "No ".uc($chan_type)." channel found for ExperimentalChip:\t".$echip->unique_id();
		}

	  }

	  #Is this is really implicit in the test above
	  if($echips{$echip->unique_id()}{'experimental_biorep'} ne $echips{$echip->unique_id()}{'total_biorep'}){
		push @log, "Found biorep mismatch between channels of ExperimentalChip ".$echip->unique_id().":\n".
		  "\tEXPERIMENTAL\t".$echips{$echip->unique_id()}{'experimental_biorep'}."\tTOTAL\t".
			$echips{$echip->unique_id()}{'total_biorep'};
	  }

	  #Is this is really implicit in the test above
	  if($echips{$echip->unique_id()}{'experimental_biotechrep'} ne $echips{$echip->unique_id()}{'total_biotechrep'}){
		push @log, "Found biotechrep mismatch between channels of ExperimentalChip ".$echip->unique_id().":\n".
		  "\tEXPERIMENTAL\t".$echips{$echip->unique_id()}{'experimental_biotechrep'}."\tTOTAL\t".
			$echips{$echip->unique_id()}{'total_biotechrep'};
	  }

		   
	}


	#Now we need to validate ec has same feature/cell type as other ecs in this br
	#this does not handle import sets which ARE allowed to have same name but different types

	#warn "Processing ".$echip->unique_id()." $biorep $biotechrep";

	
	if(exists $bio_reps{$biorep}){


	  if(! defined $bio_reps{$biorep}{'cell_type'}){
		push @log, "Found undefined CellType for biorep $biorep";
	  }
	  elsif($bio_reps{$biorep}{'cell_type'}->name() ne  $echips{$echip->unique_id()}{'cell_type'}){
		push @log, "Found CellType mismatch between $biorep and ExperimentalChip ".$echip->unique_id();
	  }
	  
	  
	  if(! defined $bio_reps{$biorep}{'feature_type'}){
		push @log, "Found undefined FeatureType for biorep $biorep";
	  }
	  elsif($bio_reps{$biorep}{'feature_type'}->name() ne  $echips{$echip->unique_id()}{'feature_type'}){
		push @log, "Found FeatureType mismatch between $biorep and ExperimentalChip ".$echip->unique_id();
	  }
	  
	  #warn "$biorep exists with\t".$bio_reps{$biorep}{'cell_type'}->name().' '.$bio_reps{$biorep}{'feature_type'}->name();
	  
	  #We need to set the tech rep here too!
	  #Do we need to validate this also, as above.
	  #This would be overkill due to the inherant nature of the TR to BR relationship

	  if(! exists $tech_reps{$biotechrep}){
		 $tech_reps{$biotechrep}{'cell_type'} = $bio_reps{$biorep}{'cell_type'};
		 $tech_reps{$biotechrep}{'feature_type'} = $bio_reps{$biorep}{'feature_type'};
	  }


	}else{

	  #warn "Creating new BR $biorep and TR $biotechrep";

	  if(defined $echips{$echip->unique_id()}{'cell_type'}){

		my $cell_type = $ct_adaptor->fetch_by_name($echips{$echip->unique_id()}{'cell_type'});

		if(! defined $cell_type){
		  push @log, 'CellType '.$echips{$echip->unique_id()}{'cell_type'}.' does not exist in the database, please use the import_type.pl script';
		}else{
		  $bio_reps{$biorep}{'cell_type'} = $cell_type;
		  $tech_reps{$biotechrep}{'cell_type'} = $cell_type;
		#  warn "Setting ".$echip->unique_id()." $biorep $biotechrep ".$cell_type->name;
		}
	  }else{
		warn "No CellType specified for ExperimentalChip:\t".$echip->unique_id()."\n";
	  }


	  if(defined $echips{$echip->unique_id()}{'feature_type'}){
		my $feature_type = $ft_adaptor->fetch_by_name($echips{$echip->unique_id()}{'feature_type'});

		if(! defined $feature_type){
		  push @log, 'FeatureType '.$echips{$echip->unique_id()}{'feature_type'}.' does not exist in the database, please use the import_type.pl script';
		}
		else{
		  $bio_reps{$biorep}{'feature_type'} = $feature_type;
		  $tech_reps{$biotechrep}{'feature_type'} = $feature_type;

		  #warn "Setting ".$echip->unique_id()." $biorep $biotechrep ".$feature_type->name;
		}
	  }else{
		warn "No FeatureType specified for ExperimentalChip:\t".$echip->unique_id()."\n";
	  }
	}

	push @{$tech_reps{$biotechrep}{'echips'}}, $echip->unique_id();
	push @{$bio_reps{$biorep}{'echips'}}, $echip->unique_id();	
  }
  



  if (@log) {
	$self->log("MAGE VALIDATION REPORT\n::\t".join("\n::\t", @log));
	throw("MAGE VALIDATION FAILED\nPlease correct tab2mage file and try again:\t".$self->get_config('tab2mage_file'));
  } else {
	$self->log('MAGE VALDIATION SUCCEEDED');
  }


  #we also need to build the tech rep results sets(not displayable)
  #do we need to have result sets for each biorep too?
  #update ExperimentalChip replicate info
  my (%rsets);
  my %types = (
			   feature => {},
			   cell    => {},
			  );


 
  #This needs to update and split the import/top level sets so they are of same types
  #update ec type here as we have ec context
  #careful not to update multiple times, just once for each ec
 
  my $eca = $self->db->get_ExperimentalChipAdaptor();
  

  foreach my $echip (@{$rset->get_ExperimentalChips()}) {
	my ($cell_type, $feature_type);

	#Set biorep info and rset
	foreach my $biorep (keys %bio_reps){

	  foreach my $chip_uid(@{$bio_reps{$biorep}{'echips'}}){

		if($chip_uid eq $echip->unique_id()){
		  $echip->biological_replicate($biorep);
		  $cell_type = $bio_reps{$biorep}{'cell_type'};
		  $feature_type = $bio_reps{$biorep}{'feature_type'};

		  if(! defined $rsets{$biorep}){
			
			$rsets{$biorep} = Bio::EnsEMBL::Funcgen::ResultSet->new
			  (
			   -NAME         => $biorep,#this may not be unique, prepend with exp name? Force method to use Experiment_and_name?
			   -ANALYSIS     => $rset->analysis(),
			   -TABLE_NAME   => 'experimental_chip',
			   -FEATURE_TYPE => $feature_type,
			   -CELL_TYPE    => $cell_type,
			  );

			#record cell and feature types
			$types{'feature'}{$feature_type->name()} = $feature_type;
			$types{'cell'}{$cell_type->name()} = $cell_type;
			$self->log("Created BioRep ResultSet:\t".$rsets{$biorep}->log_label);
		  }
		  
		  $rsets{$biorep}->add_table_id($echip->dbID(), $rset->get_chip_channel_id($echip->dbID()));
		}
	  }
	}

	#reset echip types
	$echip->feature_type($feature_type);
	$echip->cell_type($cell_type);

	
	#set tech rep info and rset
	foreach my $techrep(keys %tech_reps){
	  
	  foreach my $chip_uid(@{$tech_reps{$techrep}{'echips'}}){
		
		if($chip_uid eq $echip->unique_id()){
		  $echip->technical_replicate($techrep);

		  if(! defined $rsets{$techrep}){
			$rsets{$techrep} = Bio::EnsEMBL::Funcgen::ResultSet->new
			  (
			   -NAME       => $techrep,#this may not be unique, prepend with exp name? Force method to use Experiment_and_name?
			   -ANALYSIS   => $rset->analysis(),
			   -TABLE_NAME => 'experimental_chip',
			   -FEATURE_TYPE => $tech_reps{$techrep}{'feature_type'},
			   -CELL_TYPE    => $tech_reps{$techrep}{'cell_type'},
			  );

			$self->log("Created TechRep ResultSet:\t".$rsets{$techrep}->log_label);
		  }
		  $rsets{$techrep}->add_table_id($echip->dbID(), $rset->get_chip_channel_id($echip->dbID()));
		}
	  }
	}

	$echip->adaptor->update_replicate_types($echip);#store rep info
  }


  ### Reset/Update/Clean import sets type fields
  my $sql;

  if(scalar keys %{$types{'feature'}} >1){
	$self->log('Resetting IMPORT FeatureType to NULL for multi-FeatureType Experiment');
	$sql = "UPDATE result_set set feature_type_id='NULL' where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';

  }else{
	my ($ftype) = values %{$types{'feature'}};

	if(! defined $rset->feature_type()){
	  $self->log('Updating IMPORT FeatureType to '.$ftype->name());
	  $sql = "UPDATE result_set set feature_type_id=".$ftype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
	}
	elsif($rset->feature_type->dbID ne $ftype->dbID()){
	  warn 'FeatureType mismatch between IMPORT sets('.$rset->feature_type->name().') vs meta sets('.$ftype->name.
	  "\nUpdating to IMPORT to match meta";
	  $self->log('WARNING: FeatureType mismatch.  Updating IMPORT FeatureType('.$rset->feature_type->name().') to match meta('.$ftype->name.')');
	  $sql = "UPDATE result_set set feature_type_id=".$ftype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';

	}
  }

  $self->db->dbc->do($sql) if $sql;

  undef $sql;

  if(scalar keys %{$types{'cell'}} >1){
	$self->log('Resetting IMPORT CellType to NULL for multi-CellType Experiment');
	my $sql = "UPDATE result_set set cell_type_id='NULL' where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
  }else{
	my ($ctype) = values %{$types{'cell'}};

	if(! defined $rset->cell_type()){
	  $self->log('Updating IMPORT CellType to '.$ctype->name());
	  $sql = "UPDATE result_set set cell_type_id=".$ctype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
	}
	elsif($rset->cell_type->dbID ne $ctype->dbID()){
	  warn 'CellType mismatch between IMPORT sets('.$rset->cell_type->name().') vs meta sets('.$ctype->name.
	  "\nUpdating to IMPORT to match meta";
	  $self->log('WARNING: FeatureType mismatch.  Updating IMPORT CellType('.$rset->cell_type->name().') to match meta('.$ctype->name.')');
	  $sql = "UPDATE result_set set cell_type_id=".$ctype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
	}
  }

  $self->db->dbc->do($sql) if $sql;

  ### Generate new top level sets here based on br type combos
  #we risk duplicating sets here if import set is set to one cell/featuretype
  #duplicate anyway, as import is really just for easy tracking of all chips during import

  my %toplevel_sets;
  my $toplevel_cnt = 1;
  #could tidy up toplevel_sets implmentation

  foreach my $new_rset(values %rsets){
	
	my $ftype_name = (defined $new_rset->{'feature_type'}) ? $new_rset->{'feature_type'}->name() : undef;
	my $ctype_name = (defined $new_rset->{'cell_type'}) ? $new_rset->{'cell_type'}->name() : undef;

	if(! exists $toplevel_sets{$ftype_name}){
	  $toplevel_sets{$ftype_name} = {};
	  $toplevel_sets{$ftype_name}{'feature_type'} = $new_rset->{'feature_type'};
	}



	if(! exists $toplevel_sets{$ftype_name}{$ctype_name}){
	  $toplevel_sets{$ftype_name}{$ctype_name}{'cell_type'} = $new_rset->{'cell_type'};
	  $toplevel_sets{$ftype_name}{$ctype_name}{'rsets'} = [$new_rset];
	}else{
	  push @{$toplevel_sets{$ftype_name}{$ctype_name}{'rsets'}}, $new_rset;
	}
  }



  #build toplevel sets for each feature/cell type combo using constituent rsets
  foreach my $ftype_name(keys %toplevel_sets){
	
	foreach my $ctype_name(keys %{$toplevel_sets{$ftype_name}}){
	  
	  next if $ctype_name eq 'feature_type';#skip feature type

	  #we need to give these a different key so we're not overwriting in the rset hash
	  $rsets{$self->experiment->name().'_'.$toplevel_cnt} = Bio::EnsEMBL::Funcgen::ResultSet->new
		(
		 -NAME       => $self->experiment->name(),
		 -ANALYSIS   => $rset->analysis(),
		 -TABLE_NAME => 'experimental_chip',
		 -FEATURE_TYPE => $toplevel_sets{$ftype_name}{'feature_type'},
		 -CELL_TYPE    => $toplevel_sets{$ftype_name}{$ctype_name}{'cell_type'},
		);

	  $self->log("Created toplevel ResultSet for:\t". $rsets{$self->experiment->name().'_'.$toplevel_cnt}->log_label);

	  #add consituent table ids
	  foreach my $new_rset(@{$toplevel_sets{$ftype_name}{$ctype_name}{'rsets'}}){
		
		foreach my $ec_id(@{$new_rset->table_ids()}){

		  #Only add it if it has not already been added
		  if(!  $rsets{$self->experiment->name().'_'.$toplevel_cnt}->get_chip_channel_id($ec_id)){
			$rsets{$self->experiment->name().'_'.$toplevel_cnt}->add_table_id($ec_id, $new_rset->get_chip_channel_id($ec_id));
		  }
		}
	  }
	  $toplevel_cnt++;
	}
  }

  #ResultSet update strategy
  #To avoid messyness in resolving result_set differences
  #Simply delete all that are not used as supporting sets
  #and load new ones, log old supporting rsets for manual
  #reassignment and rollback.
  #If we have clash between an old set and a new set, rename old
  #set and log
  #We might not always have the previous data files.
  #But we might want to maintain all the previous rsets and just add a new one
  #At present this would require acquiring the previous Tab2Mage file
  #and adding the new data to it.
  #We could do with a way to merge data already in the DB with new meta data to form a new Tab2Mage file
  #and validate that
  

  my @previous_rep_sets;
  my @supporting_rset_dsets;


  #Get non-import Sets
  map {push @previous_rep_sets, $_ if $_->name !~ /_IMPORT$/} 
	@{$rset_adaptor->fetch_all_by_Experiment_Analysis($self->experiment, $chip_anal)};
 

  #rollback_ResultSet if possible
  if(@previous_rep_sets){
	$self->log('Found previously stored ResultSets');

	foreach my $prev_rset(@previous_rep_sets){
	  #THis is pushing undef array?
	  #But surely this should do nothing?
	  my $rset_dset = $self->rollback_ResultSet($prev_rset);
	  push @supporting_rset_dsets, $rset_dset if @$rset_dset;
	}
  }

  #Note: If we remove chips from an experiment, they are only removed from the non-import sets
  #To fully remove them, you need to use the rollback_experiment.pl script with -chip_ids
  #can we log this in get_import_ResultSet?

  $self->log('Storing ResultSets');

  #Store new tech, biol and toplevel type rsets
  foreach my $new_rset(values %rsets){
	my $replace_txt;

	#Rename old set if we have a name/anal/type clash
	foreach my $prs(@supporting_rset_dsets){
	
	  my ($pset, $dset) = @$prs;

	  if($pset->log_label eq $new_rset->log_label){
		$self->log("Found update supporting ResultSet clash, renaming to:\tOLD_".$rset->log_label);

		#We risk overwriting any previously renamed result sets.
		#Should use datestamp
		my $sql = 'UPDATE result_set set name="OLD_'.$rset->name.'" where result_set_id='.$pset->dbID;
		$self->db->dbc->do($sql);

		if($dset->product_FeatureSet){
		  $self->log('Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data');
		  warn 'Associated DataSet('.$dset->name.') has already been processed. It is not wise to replace a supporting set without first rolling back the FeatureSet, as there may be additional supporting data';
		}
		
		$replace_txt = 'Proposed ResultSet(dbID) replacement for DataSet('.$dset->name."):\t".$pset->dbID.' > ';
	  }
	}


	$new_rset->add_status('DAS_DISPLAYABLE');
	my ($new_rset) = @{$rset_adaptor->store($new_rset)};

	if(defined $replace_txt){
	  $self->log($replace_txt.$new_rset->dbID);
	}
  }

  my $xml_file = open_file($self->get_config('mage_xml_file'));

  #slurp in changing separator to null so we get it all in one string.
  $self->experiment->mage_xml(do{ local ($/); <$xml_file>});
  close($xml_file);

  $self->experiment($self->db->get_ExperimentAdaptor->update_mage_xml_by_Experiment($self->experiment()));
  
  return;

}

sub vendor {

  my ($self) = shift;

  if(@_){
	$self->{'vendor'} = shift;
	$self->{'vendor'} = uc($self->{'vendor'});
  }

  return $self->{'vendor'};

}

sub verbose {

  my ($self) = shift;	
  $self->{'verbose'} = shift if(@_);
  return $self->{'verbose'};

}

sub vsn_norm {

  my $self = shift;
  return $self->R_norm("VSN_GLOG");

}

General documentation

Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk

Nathan Johnson, njohnson@ebi.ac.uk

  
  Example    : $imp->validate_mage() if(! $imp->{'write_mage'};
  Description: Validates auto-generated and manually edited mage against
               Experiment information, aswell as checking replicate defitions.
               Updates mage_xml table and replicate information accordingly.
               Any other differences are logged or an error is thrown if the 
               difference is deemed critical.
  Returntype : none
  Exceptions : throws if ...?
  Caller     : Bio::EnsEMBL::Funcgen::Importer
  Status     : At risk

R_norm	Description	Code
add_Array	Description	Code
array_file	Description	Code
array_name	Description	Code
array_set	Description	Code
arrays	Description	Code
cache_probe_info	Description	Code
cache_slice	Description	Code
cell_type	Description	Code
contact	Description	Code
create_output_dirs	Description	Code
db	Description	Code
dbname	Description	Code
description	Description	Code
design_type	Description	Code
dump_fasta	Description	Code
experiment	Description	Code
experiment_date	Description	Code
experimental_set_name	Description	Code
farm	Description	Code
feature_analysis	Description	Code
feature_set_description	Description	Code
feature_type	Description	Code
format	Description	Code
get_chr_seq_region_id	Description	Code
get_config	Description	Code
get_dir	Description	Code
get_import_ResultSet	No description	Code
get_probe_cache_by_Array	Description	Code
get_probe_id_by_name_Array	Description	Code
group	Description	Code
host	No description	Code
hybridisation_fields	No description	Code
init_array_import	Description	Code
init_experiment_import	Description	Code
init_tab2mage_export	No description	Code
location	Description	Code
name	Description	Code
new	Description	Code
norm_analysis	Description	Code
norm_method	Description	Code
pass	Description	Code
port	Description	Code
read_data	Description	Code
recovery	Description	Code
register_experiment	Description	Code
registry_host	Description	Code
registry_pass	Description	Code
registry_port	Description	Code
registry_user	Description	Code
resolve_probe_data	Description	Code
result_files	Description	Code
slice_adaptor	No description	Code
species	Description	Code
store_set_probes_features	Description	Code
tidy_duplicates	No description	Code
ucsc_coords	Description	Code
user	Description	Code
validate_group	Description	Code
validate_mage()	No description	Code
vendor	Description	Code
verbose	Description	Code
vsn_norm	Description	Code