extract meanings of holidays and merge them into CSV
This commit is contained in:
parent
d2435335d4
commit
2c90b359dd
50
extract
Executable file
50
extract
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
my $input_file = 'summary-jp.html';
|
||||||
|
open my $fh_in, '<', $input_file
|
||||||
|
or die "Cannot open $input_file for reading: $!";
|
||||||
|
my $output_file = 'meanings-jp.csv';
|
||||||
|
open my $fh_out, '>', $output_file
|
||||||
|
or die "Cannot open $output_file for writing: $!";
|
||||||
|
|
||||||
|
while ( my $line = <$fh_in> ) {
|
||||||
|
chomp $line;
|
||||||
|
if ( $line =~ m|<h2.*国民の祝日に関する法律.*</h2>| ) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my $state = "head";
|
||||||
|
while ( my $line = <$fh_in> ) {
|
||||||
|
chomp $line;
|
||||||
|
|
||||||
|
if ( $line =~ m|</table>| ) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $state eq "head" ) {
|
||||||
|
if ( $line =~ m|<th>(.*)</th>| ) {
|
||||||
|
print $fh_out "$1,";
|
||||||
|
$state = "rule";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ( $state eq "rule" ) {
|
||||||
|
if ( $line =~ m|<td>(.*)</td>| ) {
|
||||||
|
print $fh_out "$1,";
|
||||||
|
$state = "meaning";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ( $state eq "meaning" ) {
|
||||||
|
if ( $line =~ m|<td>(.*)</td>| ) {
|
||||||
|
print $fh_out "$1\n";
|
||||||
|
$state = "head";
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close $fh_in;
|
||||||
|
close $fh_out;
|
4
format
4
format
@ -1,2 +1,4 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
shfmt -i 2 -ci -w update
|
shfmt -i 2 -ci -w watch update
|
||||||
|
perltidy -b extract merge
|
||||||
|
rm *.bak
|
||||||
|
41
merge
Executable file
41
merge
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
my $meanings_file = 'meanings-jp.csv';
|
||||||
|
open my $fh_meanings, '<', $meanings_file
|
||||||
|
or die "Cannot open $meanings_file for reading: $!";
|
||||||
|
my $holidays_file = 'holidays-jp.csv';
|
||||||
|
open my $fh_holidays, '<', $holidays_file
|
||||||
|
or die "Cannot open $holidays_file for reading: $!";
|
||||||
|
my $output_file = 'merged-jp.csv';
|
||||||
|
open my $fh_out, '>', $output_file
|
||||||
|
or die "Cannot open $output_file for writing: $!";
|
||||||
|
|
||||||
|
my @meanings;
|
||||||
|
while ( my $line = <$fh_meanings> ) {
|
||||||
|
chomp $line;
|
||||||
|
my @fields = split /,/, $line;
|
||||||
|
|
||||||
|
push @meanings, \@fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ( my $line = <$fh_holidays> ) {
|
||||||
|
chomp $line;
|
||||||
|
my @fields = split /,/, $line;
|
||||||
|
|
||||||
|
my $date_raw = $fields[0];
|
||||||
|
$date_raw =~ m|(\d+)/(\d+)/(\d+)|;
|
||||||
|
my $date = sprintf( "%04d-%02d-%02d", $1, $2, $3 );
|
||||||
|
my $name = $fields[1];
|
||||||
|
my $rule = "振替休日";
|
||||||
|
my $meaning = "祝日法による休日。";
|
||||||
|
for my $row (@meanings) {
|
||||||
|
if ( $$row[0] eq $name ) {
|
||||||
|
$rule = $$row[1];
|
||||||
|
$meaning = $$row[2];
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print $fh_out "$date,$name,$rule,$meaning\n";
|
||||||
|
}
|
3
update
3
update
@ -72,9 +72,10 @@ if ! diff -q $RAW_CSV_FILE $TMP_CSV_FILE >/dev/null 2>&1; then
|
|||||||
mv $TMP_CSV_FILE $RAW_CSV_FILE
|
mv $TMP_CSV_FILE $RAW_CSV_FILE
|
||||||
nkf -w $RAW_CSV_FILE >$ALL_CSV_FILE
|
nkf -w $RAW_CSV_FILE >$ALL_CSV_FILE
|
||||||
current_year=$(date +'%Y')
|
current_year=$(date +'%Y')
|
||||||
tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' >$CSV_FILE
|
tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' | tr -d '\r' >$CSV_FILE
|
||||||
log "Changed"
|
log "Changed"
|
||||||
send_mail
|
send_mail
|
||||||
|
./merge
|
||||||
else
|
else
|
||||||
log "No Change"
|
log "No Change"
|
||||||
rm $TMP_CSV_FILE
|
rm $TMP_CSV_FILE
|
||||||
|
4
watch
4
watch
@ -63,11 +63,13 @@ curl -sS -L -o $TMP_HTML_FILE $SITE_URL
|
|||||||
if ! diff -q $HTML_FILE $TMP_HTML_FILE >/dev/null 2>&1; then
|
if ! diff -q $HTML_FILE $TMP_HTML_FILE >/dev/null 2>&1; then
|
||||||
CACHE_DIR="cache"
|
CACHE_DIR="cache"
|
||||||
mkdir -p "$CACHE_DIR"
|
mkdir -p "$CACHE_DIR"
|
||||||
CURRENT_DATETIME=`date "+%Y-%m-%d_%H:%M:%S%z"`
|
CURRENT_DATETIME=$(date "+%Y-%m-%d_%H:%M:%S%z")
|
||||||
cp $TMP_HTML_FILE "$CACHE_DIR/summary-jp-$CURRENT_DATETIME.html"
|
cp $TMP_HTML_FILE "$CACHE_DIR/summary-jp-$CURRENT_DATETIME.html"
|
||||||
mv $TMP_HTML_FILE $HTML_FILE
|
mv $TMP_HTML_FILE $HTML_FILE
|
||||||
log "Changed"
|
log "Changed"
|
||||||
send_mail
|
send_mail
|
||||||
|
./extract
|
||||||
|
./merge 2>/dev/null
|
||||||
else
|
else
|
||||||
log "No Change"
|
log "No Change"
|
||||||
rm $TMP_HTML_FILE
|
rm $TMP_HTML_FILE
|
||||||
|
Loading…
Reference in New Issue
Block a user