update amtrack

This commit is contained in:
Kyle Belanger 2024-08-12 09:11:13 -04:00
parent 226551920a
commit db7d7f1055

View file

@ -30,21 +30,34 @@ for i in rows
end end
end end
#This causes an issue with two stations becauses trains often arrive one day and leave the next
# mod_df = @chain df begin
# @rsubset :act_dp != "" && :s_disrupt != "SD"
# @select Not(:comments, :s_disrupt, :cancellations)
# @rtransform _ begin
# :act_dp = Time(:act_dp, dateformat"HH:MMp")
# :orgin_date = Date(replace(:orgin_date, r" \(.*\)" => ""), dateformat"mm/dd/YYYY")
# :sch_dp = DateTime(replace(:sch_dp, r" \(.*\)" => ""), dateformat"mm/dd/YYYY HH:MM p")
# end
# # @rtransform :delay = canonicalize(Dates.CompoundPeriod(:act_dp - Time(:sch_dp)))
# # @rtransform :delay = canonicalize(:act_dp - Time(:sch_dp))
# @rtransform :delay = Dates.value(Minute(:act_dp - Time(:sch_dp)))
# end
mod_df = @chain df begin mod_df = @chain df begin
@rsubset :act_dp != "" && :s_disrupt != "SD" @rsubset :act_dp != "" && :s_disrupt != "SD"
@select Not(:comments, :s_disrupt, :cancellations) @select Not(:s_disrupt, :cancellations)
@rtransform _ begin @rtransform _ begin
:act_dp = Time(:act_dp, dateformat"HH:MMp") #can't perform match if there is nothing there
:orgin_date = Date(replace(:orgin_date, r" \(.*\)" => ""), dateformat"mm/dd/YYYY") :delay = if occursin(r"Dp:", :comments) match(r"Dp:.*", :comments).match else "" end
:sch_dp = DateTime(replace(:sch_dp, r" \(.*\)" => ""), dateformat"mm/dd/YYYY HH:MM p")
end end
# @rtransform :delay = canonicalize(Dates.CompoundPeriod(:act_dp - Time(:sch_dp)))
# @rtransform :delay = canonicalize(:act_dp - Time(:sch_dp))
@rtransform :delay = :act_dp - Time(:sch_dp)
end end
Statistics.mean(mod_df.delay) gd = @by mod_df :station begin
mod_df.delay mean = Statistics.mean(:delay)
Statistics.mean(1:20) median = Statistics.median(:delay)
end