holiday/extract

51 lines
1.1 KiB
Perl
Executable File

#!/usr/bin/perl
use strict;
use warnings;
my $input_file = 'summary-jp.html';
open my $fh_in, '<', $input_file
or die "Cannot open $input_file for reading: $!";
my $output_file = 'meanings-jp.csv';
open my $fh_out, '>', $output_file
or die "Cannot open $output_file for writing: $!";
while ( my $line = <$fh_in> ) {
chomp $line;
if ( $line =~ m|<h2.*国民の祝日に関する法律.*</h2>| ) {
last;
}
}
my $state = "head";
while ( my $line = <$fh_in> ) {
chomp $line;
if ( $line =~ m|</table>| ) {
last;
}
if ( $state eq "head" ) {
if ( $line =~ m|<th>(.*)</th>| ) {
print $fh_out "$1,";
$state = "rule";
next;
}
}
elsif ( $state eq "rule" ) {
if ( $line =~ m|<td>(.*)</td>| ) {
print $fh_out "$1,";
$state = "meaning";
next;
}
}
elsif ( $state eq "meaning" ) {
if ( $line =~ m|<td>(.*)</td>| ) {
print $fh_out "$1\n";
$state = "head";
next;
}
}
}
close $fh_in;
close $fh_out;