#!/usr/bin/perl

=pod

    Конвертация rtf-документов из data/docs/* в plain/text для показа в интерфейсе

=cut

use warnings;
use strict;

use my_inc '../..';

use Encode;
use File::Find;
use File::Slurp;

use Settings;

use Yandex::Shell;

use utf8;

my $DOCS_ROOT = $Settings::ROOT.'/data/docs';
find(\&wanted, $DOCS_ROOT);

sub wanted {
    return unless /\.rtf$/;
    my $filename = $File::Find::name;
    my $rtf = read_file($filename);
    $filename =~ s/\.rtf$/.html/;
    write_file($filename, {atomic => 1, binmode => ':utf8'}, rtf2html($rtf));
}

sub rtf2html {
    my $rtf = shift;

    local $ENV{LC_ALL} = "ru_RU.UTF-8";
    my $text = yash_qx('catdoc', '-u', '-w', \$rtf);
    $text =~ s/[ \t]+/ /g;
    $text =~ s/\n +/ /g;
    $text =~ s/^(\d+\. .*)/\n$1\n/mg;
    $text =~ s/\n\n+/\n\n/g;
    $text =~ s/ +$//mg;
    $text =~ s/ +\././g;

    if (!utf8::is_utf8($text)) {
        utf8::decode($text);
    }
    return $text;
}
