From 498be96f1bc0b4dcf41e5387ac88a9f453b7f4d7 Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Wed, 14 Aug 2024 16:31:20 -0400 Subject: [PATCH] update to DF --- LearnJulia/src/amtrak.jl | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/LearnJulia/src/amtrak.jl b/LearnJulia/src/amtrak.jl index 5973a40..ef6add8 100644 --- a/LearnJulia/src/amtrak.jl +++ b/LearnJulia/src/amtrak.jl @@ -59,7 +59,22 @@ mod_df = @chain df begin @rtransform :hour = if occursin(r"hr", :delay) match(r"[1-9]* hr", :delay).match |> x -> parse(Int,match(r"[1-9]*", x).match) |> x -> x*60 else Int(0) end @rtransform :total_delay_mins = :min + :hour |> x -> ifelse(occursin(r"late", :delay), x, x *-1) - @transform :station = categorical(:station) + @transform _ begin + :station = categorical(:station) + :train = categorical(:train) + end +end + + + + + +mod_df = @chain df begin + @rsubset :act_dp != "" && :s_disrupt != "SD" + @select :train :station :comments + #can't perform match if there is nothing there + @rtransform :delay = occursin(r"Dp:", :comments) ? match(r"Dp:.*", :comments).match : "" + @rtransform :min = occursin(r"min", :delay) ? parse(Int,match(r"([0-9]*) min", :delay)[1]) : Int(0) end @@ -68,20 +83,22 @@ gd = @chain mod_df begin :mean = Float32[Statistics.mean(:total_delay_mins)] :median = Statistics.median(:total_delay_mins) :max = maximum(:total_delay_mins) - :min = minimum(:total_delay_mins) + :min = minimum(:total_delay_mins) end @orderby :station :train @groupby :station @transform :diff = [missing; diff(:mean)] - @rtransform :station_code = levelcode(:station) + @rtransform _ begin + :station_code = levelcode(:station) + :train_code = levelcode(:train) + end end -# plot(bar(gd.station, gd.mean), xticks = (1:length(gd.station), gd.station), xrotation = 90, legend = false, title = "Mean Delay by Station", xlabel = "Station", ylabel = "Delay (mins)") +# AlgebraOfGraphics axis = (width = 750, height = 750, title = "Mean Delay by Station", xlabel = "Station", ylabel = "Delay (mins)",xticklabelrotation = 45) - mean_delay = data(gd) * mapping(:station, :mean, color = :train => "Train", dodge = :train) * visual(BarPlot) - draw(mean_delay; axis = axis) -CairoMakie.barplot(gd.station_code, gd.mean) \ No newline at end of file +# CairoMakie +CairoMakie.barplot(gd.station_code, gd.mean, dodge = gd.train_code, color = gd.train_code) \ No newline at end of file