extract meanings of holidays and merge them into CSV

This commit is contained in:
Aki Kareha 2025-02-26 01:05:56 +09:00
parent d2435335d4
commit 2c90b359dd
No known key found for this signature in database
GPG Key ID: 53C60D74F1899BF0
6 changed files with 102 additions and 3 deletions

50
extract Executable file
View File

@ -0,0 +1,50 @@
#!/usr/bin/perl
use strict;
use warnings;
my $input_file = 'summary-jp.html';
open my $fh_in, '<', $input_file
or die "Cannot open $input_file for reading: $!";
my $output_file = 'meanings-jp.csv';
open my $fh_out, '>', $output_file
or die "Cannot open $output_file for writing: $!";
while ( my $line = <$fh_in> ) {
chomp $line;
if ( $line =~ m|<h2.*国民の祝日に関する法律.*</h2>| ) {
last;
}
}
my $state = "head";
while ( my $line = <$fh_in> ) {
chomp $line;
if ( $line =~ m|</table>| ) {
last;
}
if ( $state eq "head" ) {
if ( $line =~ m|<th>(.*)</th>| ) {
print $fh_out "$1,";
$state = "rule";
next;
}
}
elsif ( $state eq "rule" ) {
if ( $line =~ m|<td>(.*)</td>| ) {
print $fh_out "$1,";
$state = "meaning";
next;
}
}
elsif ( $state eq "meaning" ) {
if ( $line =~ m|<td>(.*)</td>| ) {
print $fh_out "$1\n";
$state = "head";
next;
}
}
}
close $fh_in;
close $fh_out;

4
format
View File

@ -1,2 +1,4 @@
#!/bin/sh
shfmt -i 2 -ci -w update
shfmt -i 2 -ci -w watch update
perltidy -b extract merge
rm *.bak

41
merge Executable file
View File

@ -0,0 +1,41 @@
#!/usr/bin/perl
use strict;
use warnings;
my $meanings_file = 'meanings-jp.csv';
open my $fh_meanings, '<', $meanings_file
or die "Cannot open $meanings_file for reading: $!";
my $holidays_file = 'holidays-jp.csv';
open my $fh_holidays, '<', $holidays_file
or die "Cannot open $holidays_file for reading: $!";
my $output_file = 'merged-jp.csv';
open my $fh_out, '>', $output_file
or die "Cannot open $output_file for writing: $!";
my @meanings;
while ( my $line = <$fh_meanings> ) {
chomp $line;
my @fields = split /,/, $line;
push @meanings, \@fields;
}
while ( my $line = <$fh_holidays> ) {
chomp $line;
my @fields = split /,/, $line;
my $date_raw = $fields[0];
$date_raw =~ m|(\d+)/(\d+)/(\d+)|;
my $date = sprintf( "%04d-%02d-%02d", $1, $2, $3 );
my $name = $fields[1];
my $rule = "振替休日";
my $meaning = "祝日法による休日。";
for my $row (@meanings) {
if ( $$row[0] eq $name ) {
$rule = $$row[1];
$meaning = $$row[2];
last;
}
}
print $fh_out "$date,$name,$rule,$meaning\n";
}

3
prepare Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
./watch
./update

3
update
View File

@ -72,9 +72,10 @@ if ! diff -q $RAW_CSV_FILE $TMP_CSV_FILE >/dev/null 2>&1; then
mv $TMP_CSV_FILE $RAW_CSV_FILE
nkf -w $RAW_CSV_FILE >$ALL_CSV_FILE
current_year=$(date +'%Y')
tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' >$CSV_FILE
tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' | tr -d '\r' >$CSV_FILE
log "Changed"
send_mail
./merge
else
log "No Change"
rm $TMP_CSV_FILE

4
watch
View File

@ -63,11 +63,13 @@ curl -sS -L -o $TMP_HTML_FILE $SITE_URL
if ! diff -q $HTML_FILE $TMP_HTML_FILE >/dev/null 2>&1; then
CACHE_DIR="cache"
mkdir -p "$CACHE_DIR"
CURRENT_DATETIME=`date "+%Y-%m-%d_%H:%M:%S%z"`
CURRENT_DATETIME=$(date "+%Y-%m-%d_%H:%M:%S%z")
cp $TMP_HTML_FILE "$CACHE_DIR/summary-jp-$CURRENT_DATETIME.html"
mv $TMP_HTML_FILE $HTML_FILE
log "Changed"
send_mail
./extract
./merge 2>/dev/null
else
log "No Change"
rm $TMP_HTML_FILE