update to DF

This commit is contained in:
Kyle Belanger 2024-08-14 16:31:20 -04:00
parent c48d1aac77
commit 498be96f1b

View file

@ -59,7 +59,22 @@ mod_df = @chain df begin
@rtransform :hour = if occursin(r"hr", :delay) match(r"[1-9]* hr", :delay).match |>
x -> parse(Int,match(r"[1-9]*", x).match) |> x -> x*60 else Int(0) end
@rtransform :total_delay_mins = :min + :hour |> x -> ifelse(occursin(r"late", :delay), x, x *-1)
@transform :station = categorical(:station)
@transform _ begin
:station = categorical(:station)
:train = categorical(:train)
end
end
mod_df = @chain df begin
@rsubset :act_dp != "" && :s_disrupt != "SD"
@select :train :station :comments
#can't perform match if there is nothing there
@rtransform :delay = occursin(r"Dp:", :comments) ? match(r"Dp:.*", :comments).match : ""
@rtransform :min = occursin(r"min", :delay) ? parse(Int,match(r"([0-9]*) min", :delay)[1]) : Int(0)
end
@ -68,20 +83,22 @@ gd = @chain mod_df begin
:mean = Float32[Statistics.mean(:total_delay_mins)]
:median = Statistics.median(:total_delay_mins)
:max = maximum(:total_delay_mins)
:min = minimum(:total_delay_mins)
:min = minimum(:total_delay_mins)
end
@orderby :station :train
@groupby :station
@transform :diff = [missing; diff(:mean)]
@rtransform :station_code = levelcode(:station)
@rtransform _ begin
:station_code = levelcode(:station)
:train_code = levelcode(:train)
end
end
# plot(bar(gd.station, gd.mean), xticks = (1:length(gd.station), gd.station), xrotation = 90, legend = false, title = "Mean Delay by Station", xlabel = "Station", ylabel = "Delay (mins)")
# AlgebraOfGraphics
axis = (width = 750, height = 750, title = "Mean Delay by Station", xlabel = "Station", ylabel = "Delay (mins)",xticklabelrotation = 45)
mean_delay = data(gd) * mapping(:station, :mean, color = :train => "Train", dodge = :train) * visual(BarPlot)
draw(mean_delay; axis = axis)
CairoMakie.barplot(gd.station_code, gd.mean)
# CairoMakie
CairoMakie.barplot(gd.station_code, gd.mean, dodge = gd.train_code, color = gd.train_code)