#!/usr/bin/env perl

##  Eryk Wdowiak
##  original:  06 Dec 2017
##  updated:   10 Mar 2018
##  
##  "Vision-Zero_combine-files_napizia.pl" -- edits NYC DOT data
##    *  adds "zero" values in months when there was no accident

##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##

use strict ;
use warnings ;

##  input files
my $fatalm = "originals/nyc-dot_fatal_by-mon_dl-2018-02-27.csv" ;
my $injurm = "originals/nyc-dot_injur_by-mon_dl-2018-02-27.csv" ;

##  output files
my $otfile = "nyc-dot_by-mon_with-zeroes.csv" ;
my $nodesf = "nyc-dot_nodes.csv" ;

##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##

##  fetch injuries and fatalities
my %nycdot ;
%nycdot = fetch_csv( $injurm , \%nycdot ) ; 
%nycdot = fetch_csv( $fatalm , \%nycdot ) ;


##  create header for output file
my @otharray = ("NODEID","YRMN","year","month","Casualties",
		"Fatalities","PedFatalit","BikeFatali","MVOFatalit",
		"Injuries","PedInjurie","BikeInjuri","MVOInjurie",
		"CasualBefore","CasualAfter",
		"InjurBefore","InjurAfter","FatalBefore","FatalAfter");
my $otheader = join( "," , @otharray ) ;

##  write it all out 
open( OTFILE , ">$otfile" ) || die "could not overwrite $otfile" ; 
print OTFILE $otheader . "\n" ;

foreach my $nodeid (sort {$a <=> $b} keys %nycdot) {

    my $InjurBefore = make_zero( $nycdot{$nodeid}{"Injuries"}{"before"}  );
    my $InjurAfter  = make_zero( $nycdot{$nodeid}{"Injuries"}{"after"}   );
    my $FatalBefore = make_zero( $nycdot{$nodeid}{"Fatalities"}{"before"});
    my $FatalAfter  = make_zero( $nycdot{$nodeid}{"Fatalities"}{"after"} );
    
    my $CasualBefore = $InjurBefore + $FatalBefore ;
    my $CasualAfter  = $InjurAfter  + $FatalAfter  ;
    
    ##  data available for January 2018, but we'll stop at December 2017
    foreach my $yr (2009..2017) {
	foreach my $mo (1..12) {
		
	    ##  month and year formatting
	    my $month = "m" . sprintf( "%02d" , $mo ) ;
	    my $year  = "y" . $yr ;
	    my $yrmn  = $year . $month ; 
	    
	    ##  prepare output line
	    my @otarray ;
	    push( @otarray , $nodeid , $yrmn , $yr , $mo ) ; 
	    
	    my $fatalities = make_zero( $nycdot{$nodeid}{$year}{$month}{"Fatalities"});
	    my $injuries   = make_zero( $nycdot{$nodeid}{$year}{$month}{"Injuries"}  );
	    
	    my $casualties =  $fatalities + $injuries ;
	    push( @otarray , $casualties ) ; 
	    
	    foreach my $key ("Fatalities","PedFatalit","BikeFatali","MVOFatalit",
			     "Injuries","PedInjurie","BikeInjuri","MVOInjurie") {		
		my $value = make_zero( $nycdot{$nodeid}{$year}{$month}{$key} ); 
		push( @otarray , $value ) ; 
	    }
	    
	    push( @otarray , 
		  $CasualBefore , $CasualAfter , 
		  $InjurBefore  , $InjurAfter  ,
		  $FatalBefore  , $FatalAfter  );

	    ##  prepare output string
	    my $otline = join( "," , @otarray ) ;

	    ##  print to output file
	    print OTFILE $otline . "\n" ; 
	}
    }
}
close OTFILE ;

##  create header
my @nfharray = ("NODEID",
		"CasualBefore","CasualAfter",
		"InjurBefore","InjurAfter",
		"FatalBefore","FatalAfter",
		"nodeX","nodeY","STREET1","STREET2");
my $nfheader = join( "," , @nfharray ) ;

##  write out the nodes
open( NODESF , ">$nodesf" ) || die "could not overwrite $nodesf" ; 
print NODESF $nfheader . "\n" ;
foreach my $nodeid (sort {$a <=> $b} keys %nycdot) {

    my $InjurBefore = make_zero( $nycdot{$nodeid}{"Injuries"}{"before"}  );
    my $InjurAfter  = make_zero( $nycdot{$nodeid}{"Injuries"}{"after"}   );
    my $FatalBefore = make_zero( $nycdot{$nodeid}{"Fatalities"}{"before"});
    my $FatalAfter  = make_zero( $nycdot{$nodeid}{"Fatalities"}{"after"} );
    
    my $CasualBefore = $InjurBefore + $FatalBefore ;
    my $CasualAfter  = $InjurAfter  + $FatalAfter  ;
     
    my $nodex = $nycdot{$nodeid}{"nodex"};
    my $nodey = $nycdot{$nodeid}{"nodey"};
    my $stone = $nycdot{$nodeid}{"stone"};
    my $sttwo = $nycdot{$nodeid}{"sttwo"};

    ##  prepare output line
    my @otarray ;
    push( @otarray , $nodeid ,  
	  $CasualBefore , $CasualAfter ,
	  $InjurBefore  , $InjurAfter  ,
	  $FatalBefore  , $FatalAfter  ,
	  $nodex , $nodey , $stone , $sttwo ); 

    ##  prepare output string
    my $otline = join( "," , @otarray ) ;

    ##  print to output file
    print NODESF $otline . "\n" ; 
}
close NODESF ;

##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##  ##

##  SUBROUTINES
##  ===========

##  if value not defined, then return zero, otherwise return the value
sub make_zero {
    my $val = ( ! defined $_[0] ) ? 0 : $_[0] ; 
    return $val ;
}

##  read data from CSV file and store in hash
sub fetch_csv {

    my $infile =    $_[0] ;
    my %data   = %{ $_[1] } ;

    ##  open input file and get the header row
    open( INFILE , $infile ) || die "could not open $infile";
    chomp( my $header = <INFILE> ) ; 
    close INFILE ;
    
    ##  split the header to capture column names
    my @colnames = split( /,/ , $header ) ; 

    ##  capture data
    open( INFILE , $infile ) || die "could not open $infile";
    <INFILE> while $. < 1 ; 
    while(<INFILE>){

	##  "chomp" removes line endings
	chomp; 

	##  split the input line to capture columns of data
	my @cols = split( /,/ , $_ ) ; 

	##  hash of information on this line
	my %ch ;
	for my $i (0..$#colnames) {
	    $ch{$colnames[$i]} = $cols[$i] ;
	}
	
	##  missing info on lines 1649 to 1662 of "nyc-dot_fatal_by-mon.csv"
	if ( $ch{"MN"} eq " " && $ch{"YR"} eq " " ) {
	    $ch{"MN"} = "1" ;
	    $ch{"YR"} = "2009" ;
	}

	##  how many accidents occurred before/after start of vision zero?
	my $whenoccur = ( $ch{"YR"} < 2014 ) ? "before" : "after" ;
	
	##  month and year
	my $month = "m" . sprintf( "%02d" , $ch{"MN"} ) ;
	my $year = "y" . $ch{"YR"} ; 
	
	##  where is it?
	$data{$ch{"NODEID"}}{"nodex"} = $ch{"nodeX"}   ; 
	$data{$ch{"NODEID"}}{"nodey"} = $ch{"nodeY"}   ; 
	$data{$ch{"NODEID"}}{"stone"} = $ch{"STREET1"} ; 
	$data{$ch{"NODEID"}}{"sttwo"} = $ch{"STREET2"} ; 

	##  which file?  fatalities or injuries
	if ($colnames[3] eq "Injuries") { 
	    $data{$ch{"NODEID"}}{"Injuries"}{$whenoccur}     += $ch{"Injuries"}   ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"Injuries"}   = $ch{"Injuries"}   ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"PedInjurie"} = $ch{"PedInjurie"} ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"BikeInjuri"} = $ch{"BikeInjuri"} ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"MVOInjurie"} = $ch{"MVOInjurie"} ;
	} elsif ($colnames[3] eq "Fatalities") {
	    $data{$ch{"NODEID"}}{"Fatalities"}{$whenoccur}   += $ch{"Fatalities"} ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"Fatalities"} = $ch{"Fatalities"} ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"PedFatalit"} = $ch{"PedFatalit"} ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"BikeFatali"} = $ch{"BikeFatali"} ;
	    $data{$ch{"NODEID"}}{$year}{$month}{"MVOFatalit"} = $ch{"MVOFatalit"} ;	    
	} else {
	    my $blah = "something wrong here";
	    die $blah ;
	}
    }
    return %data ;
}

##  FID,Join_Count,TARGET_FID,Injuries,PedInjurie,BikeInjuri,MVOInjurie,MN,YR,NODEID,nodeX,nodeY,STREET1,STREET2
##  88429,1,88429,2,0,0,2,5,2014,17399,993222.127400000000000,161305.377899999990000,OCEAN PARKWAY,AVENUE P
##  
##  FID,Join_Count,TARGET_FID,Fatalities,PedFatalit,BikeFatali,MVOFatalit,MN,YR,NODEID,nodeX,nodeY,STREET1,STREET2
##  443,1,443,1,0,0,1,5,2014,17399,993222.127400000000000,161305.377899999990000,OCEAN PARKWAY,AVENUE P

