From PTAGISWiki
#!/usr/bin/perl
my $sockbase = "/net/sockeye/usr/pit/ptagdev";
my $sockweb = "/var/www/html/sockeye";
&grokdir($sockbase);
sub grokdir {
my $thisdir = shift;
print "grokdir: $thisdir\n";
opendir(THISDIR, $thisdir) or die "can't open directory $thisdir";
my @allfiles = grep { $_ ne '.' and $_ ne '..' } readdir THISDIR;
my $allfilesref = \@allfiles;
&makeindex($thisdir, $allfilesref);
closedir(THISDIR);
}
sub makeindex {
my $thisdir = shift;
my $allfilesref = shift;
my @allfiles = @$allfilesref;
my $webdir = substr($thisdir,length($sockbase));
my $localpath = substr($thisdir,length("/net/sockeye"));
print "makeindex: $thisdir, ". scalar(@allfiles)."\n";
foreach $file (@allfiles) {
my $submittal;
my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
$atime, $mtime, $ctime, $blksize, $blocks) = stat "$thisdir/$file";
my $mtimes = &formatdate($mtime);
my $atimes = &formatdate($atime);
my $ctimes = &formatdate($ctime);
my $filetype;
if (-B "$thisdir/$file") {
$filetype = "binary";
} else {
$filetype = "text";
}
($extension) = ($file =~ m/\.(\w+)/);
($mod_date) = ($file =~ m/(\d{1,2}\w{3}\d{2})/);
my $contents = "";
if ($filetype eq "text") {
$contents = `cat $thisdir/$file`;
}
$submittal = <<EOF;
<add><doc>
<field name="id">http://www.ptagis.org/doc-test$webdir/$file"</field>
<field name="filename">$file</field>
<field name="local_path">$localpath</field>
<field name="filetype">$filetype</field>
<field name="uid">$uid</field>
<field name="gid">$gid</field>
<field name="size_in_bytes">$size</field>
<field name="atime">$atimes</field>
<field name="mtime">$mtimes</field>
<field name="ctime">$ctimes</field>
<field name="extension">$extension</field>
<field name="mod_date_from_file_name">$mod_date</field>
<field name="contents">$contents</field>
</doc>
</add>
EOF
my $solrname = "$localpath/$file.xml";
#$solrname =~ s"/"_"g;
my $outfile = "/home/rday/bin/solr$solrname";
my $basedir = `dirname $outfile`;
print "basedir = $basedir\n";
`mkdir -p $basedir` unless (-d "$outdir");
open (OUT, ">$outfile") || die "couldn't write $outfile";
print OUT $submittal;
close OUT;
}
foreach $file (@allfiles) {
if ( -r "$thisdir/$file") {
if ( -d "$thisdir/$file") {
&grokdir("$thisdir/$file")
}
}
}
}
sub formatdate {
my $indate = shift;
# input is epoch seconds
# output is in the form: 1995-12-31T23:59:59Z
# output must be in UTC
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
gmtime($indate);
$mon++;
$year += 1900;
$outdate = sprintf ("%04d-%02d-%02dT%02d:%02d:%02dZ", $year, $mon,
$mday, $hour, $min, $sec);
return $outdate;
}