From 6469cb9a60f4cb16bf52324a0d07a1ffed418ffa Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Wed, 7 Aug 2024 16:26:06 -0400 Subject: [PATCH] update amtrak --- LearnJulia/src/amtrak.jl | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/LearnJulia/src/amtrak.jl b/LearnJulia/src/amtrak.jl index f100646..79224e9 100644 --- a/LearnJulia/src/amtrak.jl +++ b/LearnJulia/src/amtrak.jl @@ -1,6 +1,7 @@ using HTTP using Gumbo using Cascadia +using DataFrames url = "https://juckins.net/amtrak_status/archive/html/history.php?train_num=97&station=&date_start=07%2F01%2F2024&date_end=07%2F31%2F2024&df1=1&df2=1&df3=1&df4=1&df5=1&df6=1&df7=1&sort=schDp&sort_dir=DESC&co=gt&limit_mins=&dfon=1" @@ -8,22 +9,44 @@ resp = HTTP.get(url) page = parsehtml(String(resp.body)) # println(String(resp.body)) -sel = Selector("table") s = sel"tr" -eachmatch(sel, page.root) rows = eachmatch(s, page.root) +row_text = String[] # need to make this a nested for loopS for i in rows text = nodeText(eachmatch(Selector("tr"), i)[1]) println("$text") + push!(row_text, text) end +# this appears to work. Probably not the best way to do it but it works +orgin_date = [] +station = [] +sch_dp = [] +act_dp = [] +comments = [] +service_disrupt = [] +cancellations = [] + for i in rows - e = eachmatch(Selector("tr"), i) - for x in e - text = nodeText(eachmatch(Selector("td"),x)[1]) - println("$text") + text = eachmatch(Selector("td"), i) + if !isempty(text) && length(text) > 1 + push!(orgin_date, nodeText(text[1])) + push!(station, nodeText(text[2])) + push!(sch_dp, nodeText(text[3])) + push!(act_dp, nodeText(text[4])) + push!(comments, nodeText(text[5])) + push!(service_disrupt, nodeText(text[6])) + push!(cancellations, nodeText(text[7])) + + # for el in text + # test = nodeText(el) * ',' + # println(test) + # end end -end \ No newline at end of file +end + + +df = DataFrame(orgin_date = orgin_date) \ No newline at end of file