From 48f7069257f1deb0553e5c40abfc93c0d3c2c5aa Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Thu, 8 Aug 2024 16:31:40 -0400 Subject: [PATCH] update amtrak --- LearnJulia/src/amtrak.jl | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/LearnJulia/src/amtrak.jl b/LearnJulia/src/amtrak.jl index d8cf66d..0269c1e 100644 --- a/LearnJulia/src/amtrak.jl +++ b/LearnJulia/src/amtrak.jl @@ -14,29 +14,7 @@ page = parsehtml(String(resp.body)) s = sel"tr" rows = eachmatch(s, page.root) -# this appears to work. Probably not the best way to do it but it works -orgin_date = [] -station = [] -sch_dp = [] -act_dp = [] -comments = [] -service_disrupt = [] -cancellations = [] - -for i in rows - text = eachmatch(Selector("td"), i) - if !isempty(text) && length(text) > 1 - push!(orgin_date, nodeText(text[1])) - push!(station, nodeText(text[2])) - push!(sch_dp, nodeText(text[3])) - push!(act_dp, nodeText(text[4])) - push!(comments, nodeText(text[5])) - push!(service_disrupt, nodeText(text[6])) - push!(cancellations, nodeText(text[7])) - end -end - - +# create empty DataFrame and then populate it with the table from website df = DataFrame(orgin_date = [], station = [], sch_dp = [], act_dp = String[], comments = [], s_disrupt = [], cancellations = []) for i in rows @@ -54,9 +32,10 @@ end mod_df = @chain df begin @rsubset :act_dp != "" - @rtransform dep = Dates.Time(:act_dp, "HH:MMp") + @rtransform _ begin + :act_dp = Time(:act_dp, dateformat"HH:MMp") + :orgin_date = Date(replace(:orgin_date, r" \(.*\)" => ""), dateformat"mm/dd/YYYY") + :sch_dp = DateTime(replace(:sch_dp, r" \(.*\)" => ""), dateformat"mm/dd/YYYY HH:MM p") + end end - - -