extract meanings of holidays and merge them into CSV
This commit is contained in:
parent
d2435335d4
commit
2c90b359dd
50
extract
Executable file
50
extract
Executable file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $input_file = 'summary-jp.html';
|
||||
open my $fh_in, '<', $input_file
|
||||
or die "Cannot open $input_file for reading: $!";
|
||||
my $output_file = 'meanings-jp.csv';
|
||||
open my $fh_out, '>', $output_file
|
||||
or die "Cannot open $output_file for writing: $!";
|
||||
|
||||
while ( my $line = <$fh_in> ) {
|
||||
chomp $line;
|
||||
if ( $line =~ m|<h2.*国民の祝日に関する法律.*</h2>| ) {
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
my $state = "head";
|
||||
while ( my $line = <$fh_in> ) {
|
||||
chomp $line;
|
||||
|
||||
if ( $line =~ m|</table>| ) {
|
||||
last;
|
||||
}
|
||||
|
||||
if ( $state eq "head" ) {
|
||||
if ( $line =~ m|<th>(.*)</th>| ) {
|
||||
print $fh_out "$1,";
|
||||
$state = "rule";
|
||||
next;
|
||||
}
|
||||
}
|
||||
elsif ( $state eq "rule" ) {
|
||||
if ( $line =~ m|<td>(.*)</td>| ) {
|
||||
print $fh_out "$1,";
|
||||
$state = "meaning";
|
||||
next;
|
||||
}
|
||||
}
|
||||
elsif ( $state eq "meaning" ) {
|
||||
if ( $line =~ m|<td>(.*)</td>| ) {
|
||||
print $fh_out "$1\n";
|
||||
$state = "head";
|
||||
next;
|
||||
}
|
||||
}
|
||||
}
|
||||
close $fh_in;
|
||||
close $fh_out;
|
4
format
4
format
@ -1,2 +1,4 @@
|
||||
#!/bin/sh
|
||||
shfmt -i 2 -ci -w update
|
||||
shfmt -i 2 -ci -w watch update
|
||||
perltidy -b extract merge
|
||||
rm *.bak
|
||||
|
41
merge
Executable file
41
merge
Executable file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $meanings_file = 'meanings-jp.csv';
|
||||
open my $fh_meanings, '<', $meanings_file
|
||||
or die "Cannot open $meanings_file for reading: $!";
|
||||
my $holidays_file = 'holidays-jp.csv';
|
||||
open my $fh_holidays, '<', $holidays_file
|
||||
or die "Cannot open $holidays_file for reading: $!";
|
||||
my $output_file = 'merged-jp.csv';
|
||||
open my $fh_out, '>', $output_file
|
||||
or die "Cannot open $output_file for writing: $!";
|
||||
|
||||
my @meanings;
|
||||
while ( my $line = <$fh_meanings> ) {
|
||||
chomp $line;
|
||||
my @fields = split /,/, $line;
|
||||
|
||||
push @meanings, \@fields;
|
||||
}
|
||||
|
||||
while ( my $line = <$fh_holidays> ) {
|
||||
chomp $line;
|
||||
my @fields = split /,/, $line;
|
||||
|
||||
my $date_raw = $fields[0];
|
||||
$date_raw =~ m|(\d+)/(\d+)/(\d+)|;
|
||||
my $date = sprintf( "%04d-%02d-%02d", $1, $2, $3 );
|
||||
my $name = $fields[1];
|
||||
my $rule = "振替休日";
|
||||
my $meaning = "祝日法による休日。";
|
||||
for my $row (@meanings) {
|
||||
if ( $$row[0] eq $name ) {
|
||||
$rule = $$row[1];
|
||||
$meaning = $$row[2];
|
||||
last;
|
||||
}
|
||||
}
|
||||
print $fh_out "$date,$name,$rule,$meaning\n";
|
||||
}
|
3
update
3
update
@ -72,9 +72,10 @@ if ! diff -q $RAW_CSV_FILE $TMP_CSV_FILE >/dev/null 2>&1; then
|
||||
mv $TMP_CSV_FILE $RAW_CSV_FILE
|
||||
nkf -w $RAW_CSV_FILE >$ALL_CSV_FILE
|
||||
current_year=$(date +'%Y')
|
||||
tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' >$CSV_FILE
|
||||
tail -n +2 $ALL_CSV_FILE | awk -v cy="$current_year" -F'/' '{ if ($1 >= cy) print }' | tr -d '\r' >$CSV_FILE
|
||||
log "Changed"
|
||||
send_mail
|
||||
./merge
|
||||
else
|
||||
log "No Change"
|
||||
rm $TMP_CSV_FILE
|
||||
|
4
watch
4
watch
@ -63,11 +63,13 @@ curl -sS -L -o $TMP_HTML_FILE $SITE_URL
|
||||
if ! diff -q $HTML_FILE $TMP_HTML_FILE >/dev/null 2>&1; then
|
||||
CACHE_DIR="cache"
|
||||
mkdir -p "$CACHE_DIR"
|
||||
CURRENT_DATETIME=`date "+%Y-%m-%d_%H:%M:%S%z"`
|
||||
CURRENT_DATETIME=$(date "+%Y-%m-%d_%H:%M:%S%z")
|
||||
cp $TMP_HTML_FILE "$CACHE_DIR/summary-jp-$CURRENT_DATETIME.html"
|
||||
mv $TMP_HTML_FILE $HTML_FILE
|
||||
log "Changed"
|
||||
send_mail
|
||||
./extract
|
||||
./merge 2>/dev/null
|
||||
else
|
||||
log "No Change"
|
||||
rm $TMP_HTML_FILE
|
||||
|
Loading…
Reference in New Issue
Block a user