#!/usr/bin/perl -w
#
# Make a graph of the Kwiki nodes using GraphViz.
#
# Written by Andrew Ruthven <puck@actrix.gen.nz>
# $Id: make-kwiki-graph.pl,v 1.4 2004/10/27 02:28:53 andrew Exp $
#
# You need to have either a copy, or a symlink of config.yaml in the current
# directory.
#
# Nodes with solid borders are Wiki pages that exist in the Wiki.  Nodes with
# dashed borders don't exist in the Wiki.  Nodes with no links to other Nodes
# are orphans, no one links to them.
#
# If you just run this script with no options then a SVG is generated which
# does NOT contain the default Kwiki pages and is spat out on stdout.
#
# Try:
#   ./make-wiki-graph.pl -?
#
# For the command line options.

# Where is the Kwiki located on the machine?
my ($WIKI) = "/home/www/kwiki";

# What is the Kwiki base URL?
my ($URL)     = "http://akasha.actrix.co.nz/wiki?";

# If the file was edited in the last $LAST_DAY_TIME seconds then it goes
# at the top of the graph when --last-day is selected.
my ($LAST_DAY_TIME) = 60 * 60 * 24;

###
# No human configurable options after this point.
###

use CGI::Kwiki qw( 0.18 );
use GraphViz;
use Getopt::Long;
use Cwd;
push @INC, $WIKI;

my ($WIKI_DB) = "$WIKI/database";

# Formats that GraphViz knows about.  Format name first, and what to display
# in the -f ? option second.
my (%output_formats) = (
  'svg'   => 'SVG',
  'png'   => 'PNG',
  'ps'    => 'Postscript',
  'hpgl'  => 'Plotter output',
  'jpeg'  => 'JPEG',
  'cmap'  => 'Client side HTML image map',
  'imap'  => 'Server side HTML image map',
  'canon' => 'GraphViz input file, no layout',
  'text'  => 'GraphViz input file, layout applied'
);

# Layouts that GraphViz knows about.  Layout name first, and what to display
# in the -a ? option second.
my (%layouts) = (
  'dot'   => 'Default - works well for Wiki\'s but can be wide',
  'neato' => 'Spring layout - not good for Wiki\'s',
  'twopi' => 'Circular layout - works well for Wiki\'s'
);

# Keep track of our name.
my ($PROGRAM_NAME)             = $0 =~ /^.*\/(.*?)$/;

# We can hardcode our default output format and layout here.
my ($OUTPUT_FORMAT)            = 'svg';
my ($LAYOUT)                   = 'dot';

my ($SHOW_KWIKI_DEFAULT_PAGES) = undef;
my ($OUTPUT_FILE)              = undef;
my ($LAST_DAY_AT_TOP)          = undef;

GetOptions('format|f=s'      => \$OUTPUT_FORMAT,
           'last-day|l'      => \$LAST_DAY_AT_TOP,
           'output-file|o=s' => \$OUTPUT_FILE,
           'show-kwiki|s'    => \$SHOW_KWIKI_DEFAULT_PAGES,
	   'layout|a=s'      => \$LAYOUT,
	   'help|h|?'        => \&help);

# Check and make sure that we have valid options here.
$OUTPUT_FORMAT = check_options('format', \%output_formats, $OUTPUT_FORMAT);
$LAYOUT        = check_options('layout', \%layouts,        $LAYOUT);

# If we're putting the last day at top, then we need to know the current
# time.
my ($TIME) = undef;
if (defined $LAST_DAY_AT_TOP) {
  $TIME = time - $LAST_DAY_TIME;
}

# Keep track of where we're coming from (so we can go back there for the
# output) and jump into the wiki directory.  This ensures that the Wiki
# formatting stuff can determine if files exists correctly.
my ($cwd) = cwd();
chdir $WIKI;

# Go and build the graph.
my ($nodes) = parse_files();
my ($graph) = build_graph($nodes);

chdir $cwd;

# Spit the graph out in the appropriate format to the appropriate place.
my ($command) = "as_$OUTPUT_FORMAT";

if (defined $OUTPUT_FILE) {
  $graph->$command($OUTPUT_FILE);
} else {
  print $graph->$command;
}


# Parse all the Wiki pages and build a hash showing the connections between
# them.
sub parse_files {
  my ($driver) = CGI::Kwiki->load_driver;
  my ($kwiki)  = new CGI::Kwiki($driver);

  my ($nodes) = {};

  # Slurp in a directory listing.
  opendir(DIR, $WIKI_DB)
    || die "Failed to open directory $WIKI_DB for reading: $!\n";
  my (@files) = readdir(DIR);
  closedir(DIR);

  for my $file (@files) {
    # Skip hidden files.
    next if ($file =~ /^\./);

    # Maybe skip the Kwiki default stuff.
    #  - Kwiki 0.18 now comes with Chinese pages as well, how to skip them?
    next if (! defined $SHOW_KWIKI_DEFAULT_PAGES
             && $file =~ /^Kwiki|BrianIngerson/);

    # Slurp a file.
    open(FILE, "$WIKI_DB/$file")
      || warn "Failed to open $WIKI_DB/$file for reading: $!\n";
    my $content = do { local $/; <FILE> };
    close(FILE);

    # Disable the SlideShow stuff.
    $content =~ s/\[\&SLIDESHOW_SELECTOR\]/\[\&SLIDESHOW_IS_OFF_SELECTOR]/g;

    # Convert the wiki page to HTML as Kwiki does.
    my ($html) = $kwiki->formatter->process($content);

    # Build a node for this file.
    add_node($nodes, $file, 1);
  
    # Find all the WikiLinks to empty Wiki pages.
    if (@empty_matches = $html
        =~ /<a class="empty" href="$PROGRAM_NAME\?\w+">(\w+)<\/a>/go) {
      for my $node (@empty_matches) {
        add_edge($nodes, $file, $node, undef);
      }
    }

    # Find all the WikiLinks to Wiki pages.
    if (@matches = $html =~ /<a href="$PROGRAM_NAME\?\w+">(\w+)<\/a>/go) {
      for my $node (@matches) {
        add_edge($nodes, $file, $node, 1);
      }
    }
  }

  return $nodes;
}

# From a hash of relationships between Wiki pages build a Graph using
# GraphViz.
sub build_graph {
  my ($nodes) = shift;

  my ($graph) = GraphViz->new(no_overlap => 1, layout => $LAYOUT);

  for my $from_node (sort keys(%{ $nodes })) {
    graph_add_node($graph, $nodes, $nodes->{$from_node}{'name'});

    for my $to_node (sort keys(%{ $nodes->{$from_node}{'links'} })) {
      if (defined $nodes->{'to_node'}) {
        graph_add_edge($graph, $nodes, $from_node, $to_node);
      }
    }
  }

  return $graph;
}

# Add a node into the hash of nodes.
sub add_node {
  my ($nodes)  = shift;
  my ($node)   = shift;
  my ($exists) = shift;

  my ($time) = undef;

  # If the node exists, go and fetch the last modified date and size of
  # the file.  If the size is 0, then claim that it doesn't exist.
  if (defined $exists) {
    my ($size, $mtime) = (stat("$WIKI_DB/$node"))[7,9];

    if ($size == 0) {
      $exists = undef
    } else {
      $time = $mtime;
    }
  }

  if (! defined $nodes->{$node}) {
    $nodes->{$node}{'name'}      = $node;
    $nodes->{$node}{'exists'}    = $exists;
    $nodes->{$node}{'url'}       = "$URL$node";
    $nodes->{$node}{'mtime'}     = $time;
    $nodes->{$node}{'linked_to'} = 0;
  }
}

# Add a link from one node to another in the hash of nodes.
sub add_edge {
  my ($nodes)     = shift;
  my ($from_node) = shift;
  my ($to_node)   = shift;
  my ($exists)    = shift;

  add_node($nodes, $to_node, $exists);

  $nodes->{$from_node}{'links'}{$to_node} = \%{ $nodes->{'to_node'} };
  $nodes->{$to_node}{'linked_to'}++;
}

# Add a node into the graph.
sub graph_add_node {
  my ($graph)  = shift;
  my ($nodes)  = shift;
  my ($node)   = shift;
  my ($at_top) = undef;

  return unless defined $node;

  if (defined $LAST_DAY_AT_TOP && defined $nodes->{$node}{'mtime'}
      && $nodes->{$node}{'mtime'} > $TIME) {
    $at_top = 1;
  }

  # If the node exists, then we make the shape solid.  If it doesn't then
  # we use dashed.
  if (defined $nodes->{$node}{'exists'}) {
    if (defined $at_top) {
      $graph->add_node($nodes->{$node}{'name'}, style => 'solid',
                       URL => $nodes->{$node}{'url'}, rank => 'top');
    } else {
      $graph->add_node($nodes->{$node}{'name'}, style => 'solid',
                       URL => $nodes->{$node}{'url'});
    }
  } else {
    # Only put a node which doesn't exist on the graph if nothing trys to
    # link to it.  This is because of a hack to deal with Wiki pages that are
    # no longer wanted.  Since their size is 0, I just set the "exists" flag
    # to undef for them.  So if a node doesn't exist, and has no one trying
    # to link to it, it's deleted.
    if ($nodes->{$node}{'linked_to'} > 0) {
      if (defined $at_top) {
        $graph->add_node($nodes->{$node}{'name'}, style => 'dashed',
                         URL => $nodes->{$node}{'url'}, rank => 'top');
      } else {
        $graph->add_node($nodes->{$node}{'name'}, style => 'dashed',
                         URL => $nodes->{$node}{'url'});
      }
    }
  }
}

# Add an edge into the graph.
sub graph_add_edge {
  my ($graph)     = shift;
  my ($nodes)     = shift;
  my ($from_node) = shift;
  my ($to_node)   = shift;

  return unless defined $from_node && defined $to_node;

  # Let's make the below code slightly easier to read.
  my ($from_name) = $nodes->{$from_node}{'name'};
  my ($to_name)   = $nodes->{$to_node}{'name'};

  # Make sure that the destination node is defined - this to make sure
  # that it is dashed if it is supposed to be dashed.
  graph_add_node($graph, $nodes, $to_node);

  if (defined $nodes->{$to_node}{'links'}{$from_node}) {
    # Only add the edge if it hasn't already been added.
 
    if (! defined $nodes->{$to_node}{'edge_added_to_graph'}) {
      $graph->add_edge($from_name => $to_name, dir => 'both');
    }
  } else {
    $graph->add_edge($from_name => $to_name, dir => 'forward');
  }

  # Keep track of if the edge has been added.  Only used for bidirectional
  # edges.
  $nodes->{$from_node}{'edge_added_to_graph'} = 1;
}

# Check some of the options we may have been given.
#
# Type is either 'format', or 'layout'.
sub check_options {
  my ($type)      = shift;
  my ($available) = shift;
  my ($input)     = shift;

  # Make sure we have lowercase stuff here.
  $input = lc($input);

  # Allow the user to ask about the different things we make available.
  if ($input eq '?') {
    print_available_things($type, $available);
  }

  # Make sure the user has supplied a thing that we understand.
  if (! defined $available->{$input}) {
    warn "Sorry, you have selected an unknown $type: $input\n";

    exit 0;
  }

  return $input;
}

# Provide the user with help.
sub help {
  print "Product different types of graph from a Kwiki\n";
  print "\n";
  print "Run this program from within your Kwiki config directory.\n";
  print "\n";
  print "Usage:\n";
  print "  $0 [options]\n";
  print "\n";
  print "Options:\n";
  print "  -a, --layout\t\tSpecify GraphViz layout.  Use -a ? to find out\n";
  print "\t\t\tavailable layouts.  Default layout is $LAYOUT.\n";
  print "  -f, --format\t\tSpecify output format.  Use -f ? to find out\n";
  print "\t\t\tavailable formats.  Default format is " . uc($OUTPUT_FORMAT)
    . ".\n";
  print "  -l, --last-day\tPlace any pages that have changed in the last day\n";
  print "\t\t\tat the top of the graph.\n";
  print "  -o, --output-file\tWhere to store the output.  Defaults to stdout.\n";
  print "  -s, --show-kwiki\tShow all Kwiki default pages as well.  Defauls to off.\n";
  print "\n";

  exit 0;
}

# Print out either all the available output formats or layouts.
sub print_available_things {
  my ($type)     = shift;
  my ($things)   = shift;

  print "Available ";
  
  if ($type eq 'format') {
    print "output format";
  } elsif ($type eq 'layout') {
    print "GraphViz layouts";
  }
  print ":\n";
  print "\n";

  for my $thing (sort keys %{ $things }) {
    print "  $thing\t- " . $things->{$thing} . "\n";
  }
  print "\n";
  
  exit 0;
}
