package EnsEMBL::Web::Tools::RobotsTxt;

use strict;

sub create {
  ### This is to try and stop search engines killing e! - it gets created each
  ### time on server startup and gets placed in the first directory in the htdocs
  ### tree.
  ### Returns: none
	my $species = shift;
  my $root = $SiteDefs::ENSEMBL_HTDOCS_DIRS[0];
  my %allowed = map { ($_,1) } @{$SiteDefs::ENSEMBL_EXTERNAL_SEARCHABLE||[]};

  my %ignore = qw(robots.txt 1 .cvsignore 1);
  if( -e "$root/.cvsignore" ) {
    open I, "$root/.cvsignore";
    while(<I>) {
      $ignore{$1}=1 if/(\S+)/;
    }
    close I;
  }
warn "------------------------------------------------------------------------------
 Placing .cvsignore and robots.txt into $root
------------------------------------------------------------------------------
";

  open O, ">$root/.cvsignore";
  print O join "\n", sort keys %ignore;
  close O;

  if( open FH, ">$root/robots.txt" ) {
    print FH qq(
User-agent: *
Disallow: /Multi/
Disallow: /BioMart/
);
    foreach( @{$species||[]} ) {
      print FH qq(Disallow: /$_/\n);
      print FH qq(Allow: /$_/geneview\n) if $allowed{'gene'};
      print FH qq(Allow: /$_/sitemap.xml.gz\n);
    }
    print FH qq(

User-Agent: W3C-checklink
Disallow:
);
    close FH;
  } else {
    warn "Unable to creates robots.txt file in $root-robots";
  }
}

1;