From 2c90b359dd3800aeff1ac542aef054d1b2ebb4f1 Mon Sep 17 00:00:00 2001 From: Aki Kareha Date: Wed, 26 Feb 2025 01:05:56 +0900 Subject: [PATCH] extract meanings of holidays and merge them into CSV --- extract | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ format | 4 +++- merge | 41 +++++++++++++++++++++++++++++++++++++++++ prepare | 3 +++ update | 3 ++- watch | 4 +++- 6 files changed, 102 insertions(+), 3 deletions(-) create mode 100755 extract create mode 100755 merge create mode 100755 prepare diff --git a/extract b/extract new file mode 100755 index 0000000..b767d78 --- /dev/null +++ b/extract @@ -0,0 +1,50 @@ +#!/usr/bin/perl +use strict; +use warnings; + +my $input_file = 'summary-jp.html'; +open my $fh_in, '<', $input_file + or die "Cannot open $input_file for reading: $!"; +my $output_file = 'meanings-jp.csv'; +open my $fh_out, '>', $output_file + or die "Cannot open $output_file for writing: $!"; + +while ( my $line = <$fh_in> ) { + chomp $line; + if ( $line =~ m|| ) { + last; + } +} + +my $state = "head"; +while ( my $line = <$fh_in> ) { + chomp $line; + + if ( $line =~ m|| ) { + last; + } + + if ( $state eq "head" ) { + if ( $line =~ m|(.*)| ) { + print $fh_out "$1,"; + $state = "rule"; + next; + } + } + elsif ( $state eq "rule" ) { + if ( $line =~ m|(.*)| ) { + print $fh_out "$1,"; + $state = "meaning"; + next; + } + } + elsif ( $state eq "meaning" ) { + if ( $line =~ m|(.*)| ) { + print $fh_out "$1\n"; + $state = "head"; + next; + } + } +} +close $fh_in; +close $fh_out; diff --git a/format b/format index edab75d..b384371 100755 --- a/format +++ b/format @@ -1,2 +1,4 @@ #!/bin/sh -shfmt -i 2 -ci -w update +shfmt -i 2 -ci -w watch update +perltidy -b extract merge +rm *.bak diff --git a/merge b/merge new file mode 100755 index 0000000..dc26aca --- /dev/null +++ b/merge @@ -0,0 +1,41 @@ +#!/usr/bin/perl +use strict; +use warnings; + +my $meanings_file = 'meanings-jp.csv'; +open my $fh_meanings, '<', $meanings_file + or die "Cannot open $meanings_file for reading: $!"; +my $holidays_file = 'holidays-jp.csv'; +open my $fh_holidays, '<', $holidays_file + or die "Cannot open $holidays_file for reading: $!"; +my $output_file = 'merged-jp.csv'; +open my $fh_out, '>', $output_file + or die "Cannot open $output_file for writing: $!"; + +my @meanings; +while ( my $line = <$fh_meanings> ) { + chomp $line; + my @fields = split /,/, $line; + + push @meanings, \@fields; +} + +while ( my $line = <$fh_holidays> ) { + chomp $line; + my @fields = split /,/, $line; + + my $date_raw = $fields[0]; + $date_raw =~ m|(\d+)/(\d+)/(\d+)|; + my $date = sprintf( "%04d-%02d-%02d", $1, $2, $3 ); + my $name = $fields[1]; + my $rule = "振替休日"; + my $meaning = "祝日法による休日。"; + for my $row (@meanings) { + if ( $$row[0] eq $name ) { + $rule = $$row[1]; + $meaning = $$row[2]; + last; + } + } + print $fh_out "$date,$name,$rule,$meaning\n"; +} diff --git a/prepare b/prepare new file mode 100755 index 0000000..3022c31 --- /dev/null +++ b/prepare @@ -0,0 +1,3 @@ +#!/bin/sh +./watch +./update diff --git a/update b/update index 1647d08..bb80672 100755 --- a/update +++ b/update @@ -72,9 +72,10 @@ if ! diff -q $RAW_CSV_FILE $TMP_CSV_FILE >/dev/null 2>&1; then mv $TMP_CSV_FILE $RAW_CSV_FILE nkf -w $RAW_CSV_FILE >$ALL_CSV_FILE current_year=$(date +'%Y') - tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' >$CSV_FILE + tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' | tr -d '\r' >$CSV_FILE log "Changed" send_mail + ./merge else log "No Change" rm $TMP_CSV_FILE diff --git a/watch b/watch index 22376c7..7b0bc65 100755 --- a/watch +++ b/watch @@ -63,11 +63,13 @@ curl -sS -L -o $TMP_HTML_FILE $SITE_URL if ! diff -q $HTML_FILE $TMP_HTML_FILE >/dev/null 2>&1; then CACHE_DIR="cache" mkdir -p "$CACHE_DIR" - CURRENT_DATETIME=`date "+%Y-%m-%d_%H:%M:%S%z"` + CURRENT_DATETIME=$(date "+%Y-%m-%d_%H:%M:%S%z") cp $TMP_HTML_FILE "$CACHE_DIR/summary-jp-$CURRENT_DATETIME.html" mv $TMP_HTML_FILE $HTML_FILE log "Changed" send_mail + ./extract + ./merge 2>/dev/null else log "No Change" rm $TMP_HTML_FILE