Solr-sockeye.pl

From PTAGISWiki

Jump to: navigation, search
#!/usr/bin/perl

my $sockbase = "/net/sockeye/usr/pit/ptagdev";
my $sockweb = "/var/www/html/sockeye";

&grokdir($sockbase);

sub grokdir {
        my $thisdir = shift;
        print "grokdir: $thisdir\n";
        opendir(THISDIR, $thisdir) or die "can't open directory $thisdir";
        my @allfiles = grep { $_ ne '.' and $_ ne '..' } readdir THISDIR;
        my $allfilesref = \@allfiles;
        &makeindex($thisdir, $allfilesref);
        closedir(THISDIR);
}

sub makeindex {
        my $thisdir = shift;
        my $allfilesref = shift;
        my @allfiles = @$allfilesref;
        my $webdir = substr($thisdir,length($sockbase));
        my $localpath = substr($thisdir,length("/net/sockeye"));
        print "makeindex: $thisdir, ". scalar(@allfiles)."\n";
        foreach $file (@allfiles) {
                my $submittal;
                my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
                        $atime, $mtime, $ctime, $blksize, $blocks) = stat "$thisdir/$file";
                my $mtimes = &formatdate($mtime);
                my $atimes = &formatdate($atime);
                my $ctimes = &formatdate($ctime);
                my $filetype;
                if (-B "$thisdir/$file") {
                        $filetype = "binary";
                } else {
                        $filetype = "text";
                }
                ($extension) = ($file =~ m/\.(\w+)/);
                ($mod_date) = ($file =~ m/(\d{1,2}\w{3}\d{2})/);
                my $contents = "";
                if ($filetype eq "text") {
                        $contents = `cat $thisdir/$file`;
                }
                $submittal = <<EOF;
<add><doc>
<field name="id">http://www.ptagis.org/doc-test$webdir/$file"</field>
<field name="filename">$file</field>
<field name="local_path">$localpath</field>
<field name="filetype">$filetype</field>
<field name="uid">$uid</field>
<field name="gid">$gid</field>
<field name="size_in_bytes">$size</field>
<field name="atime">$atimes</field>
<field name="mtime">$mtimes</field>
<field name="ctime">$ctimes</field>
<field name="extension">$extension</field>
<field name="mod_date_from_file_name">$mod_date</field>
<field name="contents">$contents</field>
</doc>
</add>
EOF
                my $solrname = "$localpath/$file.xml";
                #$solrname =~ s"/"_"g;
                my $outfile = "/home/rday/bin/solr$solrname";
                my $basedir = `dirname $outfile`;
                print "basedir = $basedir\n";
                `mkdir -p $basedir` unless (-d "$outdir");
                open (OUT, ">$outfile") || die "couldn't write $outfile";
                print OUT $submittal;
                close OUT;
}

        foreach $file (@allfiles) {
                if ( -r "$thisdir/$file") {
                        if ( -d "$thisdir/$file") {
                                &grokdir("$thisdir/$file") 
                        }
                }
        }
}

sub formatdate {
        my $indate = shift;
        # input is epoch seconds
        # output is in the form: 1995-12-31T23:59:59Z
        # output must be in UTC
   my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
        gmtime($indate);
        $mon++;
        $year += 1900;
        $outdate = sprintf ("%04d-%02d-%02dT%02d:%02d:%02dZ", $year, $mon,
        $mday, $hour, $min, $sec);
        return $outdate;
}
Personal tools