#!/usr/bin/env perl
# swg -- simple wiki/zettelkästen helper/generator
# Copyright (C) 2022-2025 Sergey Matveev <stargrave@stargrave.org>

use strict;
use warnings;
use utf8;
use Carp q{croak};

use version; our $VERSION = qv(1.0.0);

use Encode qw(encode decode);
binmode STDOUT, q{:encoding(UTF-8)};

my $CR = "\r";

sub usage {
    print {*STDERR} <<"END_USAGE"
Usage:
    $0 links PAGE    -- show links on the PAGE
    $0 backs PAGE    -- show backlinks to the PAGE
    $0 cats [PREFIX] -- show categories
    $0 htmls DIR     -- render HTMLs in DIR
    $0 dot | dot -Tpng >relations.png
    $0 info >out.info
    $0 latex >out.tex

For debugging:
    $0 files -- list files that will be processed
    $0 dump  -- dump links/backlinks information
    $0 html PAGE >PAGE.html
    $0 gen-index CAT >CAT.html

By default SWG_DO_BACKS=1 is set.
SWG_PRINT_MISSING=0. SWG_DO_SRC=1.
Do not forget about .swgignore with regular expressions.
END_USAGE
;
    exit 1;
}

if ($#ARGV == -1) { usage }

my $DoBacks = ((not exists $ENV{SWG_DO_BACKS}) or
    ($ENV{SWG_DO_BACKS} eq q{1})) ? 1 : 0;
my $DoSrc = ((not exists $ENV{SWG_DO_SRC}) or
    ($ENV{SWG_DO_SRC} eq q{1})) ? 1 : 0;

my %Mtimes;
my %CatFiles;
my %CatDirs;

use File::Basename;

{
    my @ignores;
    my $swgignore = q{.swgignore};
    if (-e $swgignore) {
        open my $fh, q{<:encoding(UTF-8)}, $swgignore or croak "$!";
        while (<$fh>) {
            chop;
            push @ignores, $_;
        }
        close $fh or croak "$!";
    }
    use File::Find;
    use POSIX qw(strftime);
    my $pth;
    my $cat;
    my $mtime;
    sub wanted {
        my $fn = $_;
        $pth = decode q{UTF-8}, $File::Find::name;
        $pth =~ s/^[.]\/?//;
        foreach (split /\//, $pth) {
            if (/^[.]/) {
                $File::Find::prune = 1;
                return;
            }
        }
        if (-d $fn) {
            $pth .= q{/};
        }
        foreach (@ignores) {
            if ($pth =~ /$_/) {
                $File::Find::prune = 1;
                return;
            }
        }
        if ($ARGV[0] eq q{files}) {
            print "$pth\n";
            return;
        }
        $cat = dirname $pth;
        $cat = ($cat eq q{.}) ? q{/} : "$cat/";
        if (-d $fn) {
            if (not defined $CatFiles{$pth}) {
                my @files;
                my @dirs;
                $CatFiles{$pth} = \@files;
                $CatDirs{$pth} = \@dirs;
            }
            return if $pth eq q{/};
            push @{$CatDirs{$cat}}, $pth;
        } else {
            push @{$CatFiles{$cat}}, $pth;
            (undef, undef, undef, undef, undef, undef, undef, undef,  undef,
                $mtime, undef, undef, undef) = stat $fn or croak "$!";
            $Mtimes{$pth} = strftime q{%Y-%m-%d %H:%M:%S}, gmtime $mtime;
        }
        return;
    }
    my %opts = (wanted => \&wanted, follow => 1);
    find(\%opts, q{.});
}

sub noindex {
    my $l = shift;
    if ($l =~ /(.*\/)index$/) {
        return $1;
    }
    return $l;
}

my %Lines;
my %Links;
my %Backs;
for my $pth (keys %Mtimes) {
    my %found;
    my $lines = 0;
    my sub procline;
    sub procline {
        my ($line) = @_;
        if ($line =~ /^.*<<.*\[(.*)\]${CR}/) {
            $found{$1} = 1;
            open my $fh, q{<:encoding(UTF-8)}, $1 or croak "$!";
            while (<$fh>) { $lines++; procline $_ }
            close $fh or croak "$!";
            return;
        }
        foreach my $w (split /\s+/, $line) {
            if ($w =~ /\[([^]]+)\]/) {
                $w = $1;
            } else {
                next;
            }
            if ($w =~ /\/$/) {
                if (not exists $CatDirs{$w}) {
                    if (exists $ENV{SWG_PRINT_MISSING}) {
                        print "missing $w\n";
                    }
                    next;
                }
            } else {
                if (not exists $Mtimes{$w}) {
                    if (exists $ENV{SWG_PRINT_MISSING}) {
                        print "missing $w\n";
                    }
                    next;
                }
            }
            $found{$w} = 1;
        }
        return;
    }
    open my $fh, q{<:encoding(UTF-8)}, $pth or croak "$!";
    while (<$fh>) { $lines++; procline $_ }
    close $fh or croak "$!";
    $Lines{$pth} = $lines;
    my @ws = sort keys %found;
    next if $#ws == -1;
    $pth = noindex $pth;
    $Links{$pth} = \@ws;
    foreach (@ws) {
        if (not exists $Backs{$_}) {
            my %h;
            $Backs{$_} = \%h;
        }
        $Backs{$_}{$pth} = 1;
    }
}

sub genIndex {
    my $out = shift;
    my $page = shift;
    my $ctr = 0;
    my $pth;
    print {$out} "        Links:\n";
    foreach (sort @{$CatFiles{$page}}) {
        printf {$out} "%3d % -39s %s %8d\n", $ctr, "[$_]", $Mtimes{$_}, $Lines{$_};
        $ctr++;
    }
    my @links = sort @{$CatDirs{$page}};
    if ($#links != -1) {
        print {$out} "\n        Subcategories:\n";
        $ctr = 0;
        my @entries;
        my $count;
        foreach (@links) {
            @entries = @{$CatFiles{$_}};
            $count = 1 + $#entries;
            @entries = @{$CatDirs{$_}};
            $count += 1 + $#entries;
            printf {$out} "%3d % -64s %3d\n", $ctr, "[$_]", $count;
            $ctr++;
        }
    }
    print {$out} "do-backs${CR}\n";
    return;
}

sub genIndex2Buf {
    my $p = shift;
    my $buf = q{};
    open my $fh, q{>:encoding(UTF-8)}, \$buf or croak "$!";
    genIndex $fh, $p;
    close $fh or croak "$!";
    return $buf;
}

sub htmlescape {
    my $s = shift;
    $s =~ s/&/\&amp;/g;
    $s =~ s/</\&lt;/g;
    $s =~ s/>/\&gt;/g;
    return $s;
}

use File::Spec;

sub genHTML {
    my $out = shift;
    my $page = shift;
    my $buf = shift;
    my $doBacks = $DoBacks;
    my @links = ();
    if ($page =~ /\/$/) {
        @links = (@{$CatFiles{$page}}, @{$CatDirs{$page}});
        $doBacks = 0;
    } elsif (exists $Links{noindex $page}) {
        @links = @{$Links{noindex $page}};
    }
    my @rels;
    my sub makerels {
        @rels = ();
        my $rel;
        my $base = $page;
        if ($page =~ /\/$/) {
            $base .= q{index};
        }
        $base = dirname $base;
        if ($base eq q{/}) {
            $base = ".";
        }
        foreach (@links) {
            $rel = File::Spec->abs2rel($_, $base);
            if (-d) {
                if ($rel eq q{.}) {
                    $rel = q{index};
                } else {
                    $rel .= q{/index};
                }
            }
            push @rels, $rel;
        }
        return;
    }
    makerels;
    {
        my $title = noindex $page;
        my $fn = ($page =~ /\/$/) ? q{index} : basename $page;
        print {$out} <<"END_HTML"
<!DOCTYPE html>
<html><head>
<title>$title</title>
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">
END_HTML
;
        print {$out} "<link rel=\"alternate\" type=\"text/plain\" title=\"src\" href=\"$fn.txt\" />\n" if $DoSrc;
        print {$out} <<"END_HTML"
</head>
<body>
<pre>
END_HTML
;
    }
    my $indent = q{};
    my sub procline;
    sub procline {
        $_ = shift;
        chop;
        if (/${CR}$/) {
            chop;
            my $head = q{};
            my $cmd = $_;
            if (/^(\s+)(.*)$/) {
                $head = $1;
                $cmd = $2;
            }
            if ($cmd =~ /^=> (\S+)\s?(.*)$/) {
                my $url = $1;
                my $t = ($2 eq q{}) ? $1 : $2;
                $t = htmlescape $t;
                $t =~ s/"/\&quot;/g;
                $_ = "$head=&gt; <a href=\"$url\">$t</a>";
            } elsif ($cmd =~ /^img (\S+)\s?(.*) => (\S+)$/) {
                if ($2 eq q{}) {
                    $_ = "$head<a href=\"$3\"><img src=\"$1\" /></a>";
                } else {
                    my $urlImg = $1;
                    my $urlA = $3;
                    my $t = $2;
                    $t = htmlescape $t;
                    $t =~ s/"/\&quot;/g;
                    $_ = "$head<a href=\"$urlA\"><img src=\"$urlImg\" alt=\"$t\" /></a>";
                }
            } elsif ($cmd =~ /^img (\S+)\s?(.*)$/) {
                if ($2 eq q{}) {
                    $_ = "$head<img src=\"$1\" />";
                } else {
                    my $url = $1;
                    my $t = $2;
                    $t = htmlescape $t;
                    $t =~ s/"/\&quot;/g;
                    $_ = "$head<img src=\"$url\" alt=\"$t\" />";
                }
            } elsif ($cmd eq q{do-backs}) {
                $doBacks = 1;
                return;
            } elsif ($cmd =~ /^#/) {
                return;
            } elsif ($cmd =~ /^[|]/) {
                $_ = $head . substr $cmd, 1 + (index $cmd, q{|});
            } elsif ($cmd =~ /^<<(.*)\[([^[]+)\]$/) {
                my $indentOrig = $indent;
                $indent .= "${head}$1";
                open my $fh, q{<:encoding(UTF-8)}, $2 or croak "$!";
                while (<$fh>) { procline $_ }
                close $fh or croak "$!";
                $indent = $indentOrig;
                return;
            } else {
                croak "unknown \"$cmd\" command: $page\n";
            }
        } else {
            $_ = htmlescape $_;
            if (/\[.+\]/) {
                while (my ($i, $l) = each @links) {
                    s/\[\Q$l\E\]/<a href="$rels[$i].html">[$l]<\/a>/g;
                }
            }
        }
        print {$out} "${indent}$_\n";
        return;
    }
    {
        my $fh;
        if (defined $buf) {
            open $fh, q{<:encoding(UTF-8)}, \$buf or croak "$!";
        } else {
            open $fh, q{<:encoding(UTF-8)}, $page or croak "$!";
        }
        while (<$fh>) { procline $_ }
        close $fh or croak "$!";
    }
    @links = sort keys %{$Backs{noindex $page}};
    my $backsWereGenerated = ($doBacks && $#links != -1) ? 1 : 0;
    if ($backsWereGenerated) {
        makerels;
        procline "|<a id=\"backs\"><hr/>${CR}\n";
        print {$out} "        Backlinks:\n";
        my $ctr = 0;
        my $pth;
        foreach my $l (@links) {
            $pth = noindex $l;
            procline sprintf "%3d % -39s %19s %8d\n",
                $ctr, "[$pth]", ($Mtimes{$l} or q{}), ($Lines{$l} or 0);
            $ctr++;
        }
    }
    print {$out} "</pre>\n";
    print {$out} "</body></html>\n";
    return $backsWereGenerated;
}

sub nodename {
    my $n = shift;
    $n =~ tr'():,.'-----';
    return $n;
}

sub printMenuEntry {
    print q{* } . (nodename $_) . q{: } . (nodename $_) . ".\n";
    return;
}

sub genInfo {
    my $page = shift;
    my %links;
    if ($page =~ /\/$/) {
        foreach ((@{$CatFiles{$page}}, @{$CatDirs{$page}})) {
            $links{$_} = 1;
        }
    } elsif (exists $Links{noindex $page}) {
        foreach (@{$Links{noindex $page}}) {
            $links{$_} = 1;
        }
    }
    my sub linked {
        my $n = shift;
        if (not exists $links{$n}) {
            return $n;
        }
        $n = nodename $n;
        return "[*note ${n}::]";
    }
    open my $fh, q{<:encoding(UTF-8)}, $page or croak "$!";
    while (<$fh>) {
        chop;
        if (/${CR}$/) {
            chop;
            /^\s*(.*)$/;
            if (($1 =~ /^#/) or ($1 =~ /^do-backs/)) {
                next;
            }
        }
        s/\[([^]]+)\]/linked $1/ge;
        print "$_\n";
    }
    close $fh or croak "$!";
    my @backs = sort keys %{$Backs{noindex $page}};
    if ($#backs != -1) {
        print "\n* Menu:\nBacklinks:\n";
        foreach (@backs) {
            printMenuEntry $_;
        }
        print "\n";
    }
    return;
}

sub genLaTeXSection {
    my $page = shift;
    my $buf = shift;
    print <<"END_LATEX"
\\addcontentsline{toc}{section}{$page}
\\section*{$page}
\\begin{verbatim}
END_LATEX
;
    my $fh;
    if (defined $buf) {
        open $fh, q{<:encoding(UTF-8)}, \$buf or croak "$!";
    } else {
        open $fh, q{<:encoding(UTF-8)}, $page or croak "$!";
    }
    my $wasClosed = 0;
    while (<$fh>) {
        chop;
        if (/${CR}$/) {
            chop;
            /^\s*(.*)$/;
            my $cmd = $1;
            if (($cmd =~ /^#/) or ($cmd =~ /^do-backs/)) {
                next;
            }
            if ($cmd =~ /^=> (\S+)\s?(.*)$/) {
                my $url = $1;
                my $t = ($2 eq q{}) ? $1 : $2;
                if (not $wasClosed) {
                    print "\\end{verbatim}\n";
                }
                print "\$\\Rightarrow\$ \\href{$url}{$t} \\newline\n";
                $wasClosed = 1;
                next;
            } elsif ($cmd =~ /^img (\S+)\s?(.*)$/) {
                if (not $wasClosed) {
                    print "\\end{verbatim}\n";
                }
                print "\\includegraphics{$1} \\newline\n";
                next;
            }
        }
        if ($wasClosed) {
            $wasClosed = 0;
            print "\\begin{verbatim}\n";
        }
        if (/^(.*)\\end\{verbatim\}(.*)$/) {
            print "$1\\end{verbatim}\\verb|\\end{verbatim}|\\begin{verbatim}$2";
            next;
        }
        print "$_\n";
    }
    my @backs = sort keys %{$Backs{noindex $page}};
    if ($#backs != -1) {
        print "\nBacklinks:\n";
        foreach (@backs) {
            print "[$_]\n";
        }
    }
    if (not $wasClosed) {
        print "\\end{verbatim}\n";
    }
    return;
}

sub genInfoIndex {
    my $page = shift;
    my $pth;
    print "* Menu:\n\n";
    foreach (sort @{$CatFiles{$page}}) {
        printMenuEntry $_;
    }
    print "\n";
    my @links = sort @{$CatDirs{$page}};
    if ($#links != -1) {
        print "\n* Menu:\nSubcategories:\n";
        foreach (@links) {
            printMenuEntry $_;
        }
        print "\n";
    }
    @links = sort keys %{$Backs{noindex $page}};
    if ($#links != -1) {
        print "\n* Menu:\nBacklinks:\n";
        foreach (@links) {
            printMenuEntry $_;
        }
        print "\n";
    }
    return;
}

if ($ARGV[0] eq q{files}) {
    # do nothing
} elsif ($ARGV[0] eq q{dump}) {
    require Data::Dumper;
    print Data::Dumper->Dump([
        \%Links,
        \%Backs,
        \%CatFiles,
        \%CatDirs,
        \%Mtimes,
        \%Lines,
    ],
    [qw(*Links *Backs *CatFiles *CatDirs *Mtimes *Lines)]);
} elsif ($ARGV[0] eq q{links}) {
    my $p = decode q{UTF-8}, $ARGV[1];
    foreach (@{$Links{$p}}) { print "$_\n" }
} elsif ($ARGV[0] eq q{backs}) {
    my $p = decode q{UTF-8}, $ARGV[1];
    foreach (sort keys %{$Backs{$p}}) { print "$_\n" }
} elsif ($ARGV[0] eq q{cats}) {
    my $p = (defined $ARGV[1]) ? $ARGV[1] : q{};
    $p = decode q{UTF-8}, $p;
    foreach (sort keys %CatFiles) {
        if (not /^$p/) {
            next;
        }
        if ($_ eq q{}) {
            next;
        }
        print "$_\n";
    }
} elsif ($ARGV[0] eq q{gen-index}) {
    my $p = decode q{UTF-8}, $ARGV[1];
    genIndex \*STDOUT, $p;
} elsif ($ARGV[0] eq q{html}) {
    my $p = decode q{UTF-8}, $ARGV[1];
    if ($p =~ /\/$/) {
        genHTML \*STDOUT, $p, genIndex2Buf $p;
    } else {
        genHTML \*STDOUT, $p;
    }
} elsif ($ARGV[0] eq q{htmls}) {
    my $now = time;
    use File::Path qw(make_path);
    use File::Copy;
    my $fh;
    my $fn;
    foreach my $cat (keys %CatFiles) {
        make_path "$ARGV[1]/$cat";
        next if (exists $Mtimes{"${cat}index"});
        if ($DoSrc) {
            $fn = "$ARGV[1]/${cat}index.txt";
            open $fh, q{>:encoding(UTF-8)}, $fn or croak "$!";
            genIndex $fh, $cat;
            close $fh or croak "$!";
            utime $now, $now, $fn;
        }
        $fn = "$ARGV[1]/${cat}index.html";
        open $fh, q{>:encoding(UTF-8)}, $fn or croak "$!";
        genHTML $fh, $cat, genIndex2Buf $cat;
        close $fh or croak "$!";
        utime $now, $now, $fn;
    }
    my $mtime;
    foreach my $pth (keys %Mtimes) {
        open $fh, q{>:encoding(UTF-8)}, "$ARGV[1]/$pth.html" or croak "$!";
        my $backsWereGenerated = genHTML $fh, $pth;
        close $fh or croak "$!";
        (undef, undef, undef, undef, undef, undef, undef, undef,  undef,
            $mtime, undef, undef, undef) = stat $pth or croak "$!";
        if ($backsWereGenerated) {
            utime $now, $now, "$ARGV[1]/$pth.html";
        } else {
            utime $mtime, $mtime, "$ARGV[1]/$pth.html";
        }
        if ($DoSrc) {
            copy $pth, "$ARGV[1]/$pth.txt" or croak "$!";
            utime $mtime, $mtime, "$ARGV[1]/$pth.txt";
        }
    }
} elsif ($ARGV[0] eq q{info}) {
    print "Autogenerated by swg $VERSION\n";
    my $sep = "\n\n";
    print "${sep}File: self, Node: Top, Up: (dir)\n\n";
    if (exists $Mtimes{q{index}}) {
        genInfo q{index};
        delete $Mtimes{q{index}};
        delete $CatFiles{q{/}};
    } else {
        genInfoIndex q{/};
        delete $CatFiles{q{/}};
    }
    sub up {
        my $p = dirname noindex shift;
        if (($p eq q{.}) || ($p eq q{/})) {
            return q{Top};
        }
        return (nodename $p) . q{/};
    }
    foreach my $cat (keys %CatFiles) {
        next if (exists $Mtimes{"${cat}index"});
        print "${sep}File: self, Node: " . (nodename $cat) .
            q{, Up: } . (up $cat) . "\n\n";
        genInfoIndex $cat;
    }
    foreach my $page (keys %Mtimes) {
        print "${sep}File: self, Node: " . (nodename noindex $page) .
            q{, Up: } . (up $page) . "\n\n";
        genInfo $page;
    }
    print "${sep}File: self, Node: index, Up: Top\n\n";
    print " [index ]\n* Menu:\n";
    foreach my $cat (keys %CatFiles) {
        print q{* } . (nodename $cat) . q{: } .
            (nodename $cat) . ". (line 0)\n";
    }
    foreach my $page (keys %Mtimes) {
        print q{* } . (nodename noindex $page) . q{: } .
            (nodename noindex $page) . ". (line 0)\n";
    }
    print "${sep}Local Variables:\ncoding: utf-8\nEnd:\n";
} elsif ($ARGV[0] eq q{latex}) {
    print <<"END_LATEX"
\\documentclass[a4paper,10pt]{report}
\\usepackage{iftex}
\\ifxetex
    \\usepackage{fontspec}
    \\setmainfont{CMU Serif}
    \\setmonofont{Go Mono}
\\else
    \\usepackage[T2A]{fontenc}
    \\usepackage[utf8]{inputenc}
\\fi
\\usepackage[pdfa,colorlinks=true,final]{hyperref}
\\usepackage{a4wide}
\\usepackage{graphicx}
\\begin{document}
\\small
END_LATEX
;
    if (exists $Mtimes{q{index}}) {
        genLaTeXSection q{index};
        delete $Mtimes{q{index}};
        delete $CatFiles{q{/}};
    } else {
        genLaTeXSection "/", genIndex2Buf q{/};
        delete $CatFiles{q{/}};
    }
    foreach my $cat (keys %CatFiles) {
        next if (exists $Mtimes{"${cat}index"});
        genLaTeXSection $cat, genIndex2Buf $cat;
    }
    foreach my $page (keys %Mtimes) {
        genLaTeXSection $page;
    }
    print "\\tableofcontents\n";
    print "\\end{document}\n";
} elsif ($ARGV[0] eq q{dot}) {
    print "digraph d {\n";
    print "rankdir=LR\n";
    print "node[shape=rectangle]\n";
    while (my ($from, $v) = each %Links) {
        foreach (@{$v}) {
            print "\t\"$from\" -> \"$_\"\n";
        }
    }
    print "}\n";
} else {
    usage;
}
