diff --git a/LearnJulia/src/amtrak.jl b/LearnJulia/src/amtrak.jl index 0269c1e..6071de6 100644 --- a/LearnJulia/src/amtrak.jl +++ b/LearnJulia/src/amtrak.jl @@ -4,6 +4,7 @@ using Cascadia using DataFrames using DataFramesMeta using Dates +using Statistics url = "https://juckins.net/amtrak_status/archive/html/history.php?train_num=97&station=&date_start=07%2F01%2F2024&date_end=07%2F31%2F2024&df1=1&df2=1&df3=1&df4=1&df5=1&df6=1&df7=1&sort=schDp&sort_dir=DESC&co=gt&limit_mins=&dfon=1" @@ -31,11 +32,19 @@ end mod_df = @chain df begin - @rsubset :act_dp != "" + @rsubset :act_dp != "" && :s_disrupt != "SD" + @select Not(:comments, :s_disrupt, :cancellations) @rtransform _ begin :act_dp = Time(:act_dp, dateformat"HH:MMp") :orgin_date = Date(replace(:orgin_date, r" \(.*\)" => ""), dateformat"mm/dd/YYYY") :sch_dp = DateTime(replace(:sch_dp, r" \(.*\)" => ""), dateformat"mm/dd/YYYY HH:MM p") end + # @rtransform :delay = canonicalize(Dates.CompoundPeriod(:act_dp - Time(:sch_dp))) + # @rtransform :delay = canonicalize(:act_dp - Time(:sch_dp)) + @rtransform :delay = :act_dp - Time(:sch_dp) end + +Statistics.mean(mod_df.delay) +mod_df.delay +Statistics.mean(1:20)