From d3ec1f19b40dae6ca16e600dff0789fe6877b1ba Mon Sep 17 00:00:00 2001
From: Kyle Belanger <kyleb44@hotmail.com>
Date: Thu, 8 Aug 2024 14:31:52 -0400
Subject: [PATCH] update amtrak.jl

---
 LearnJulia/src/amtrak.jl | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/LearnJulia/src/amtrak.jl b/LearnJulia/src/amtrak.jl
index 79224e9..d8cf66d 100644
--- a/LearnJulia/src/amtrak.jl
+++ b/LearnJulia/src/amtrak.jl
@@ -2,6 +2,8 @@ using HTTP
 using Gumbo
 using Cascadia
 using DataFrames
+using DataFramesMeta
+using Dates
 
 url = "https://juckins.net/amtrak_status/archive/html/history.php?train_num=97&station=&date_start=07%2F01%2F2024&date_end=07%2F31%2F2024&df1=1&df2=1&df3=1&df4=1&df5=1&df6=1&df7=1&sort=schDp&sort_dir=DESC&co=gt&limit_mins=&dfon=1"
 
@@ -12,15 +14,6 @@ page = parsehtml(String(resp.body))
 s = sel"tr"
 rows = eachmatch(s, page.root)
 
-row_text = String[]
-
-# need to make this a nested for loopS
-for i in rows
-    text = nodeText(eachmatch(Selector("tr"), i)[1])
-    println("$text")
-    push!(row_text, text)
-end
-
 # this appears to work.  Probably not the best way to do it but it works
 orgin_date = []
 station = []
@@ -40,13 +33,30 @@ for i in rows
         push!(comments, nodeText(text[5]))
         push!(service_disrupt, nodeText(text[6]))
         push!(cancellations, nodeText(text[7]))
-        
-        # for el in text
-        #     test = nodeText(el) * ','
-        #     println(test)
-        # end
     end
 end
 
 
-df = DataFrame(orgin_date = orgin_date)
\ No newline at end of file
+df = DataFrame(orgin_date = [], station = [], sch_dp = [], act_dp = String[], comments = [], s_disrupt = [], cancellations = [])
+
+for i in rows
+    text = eachmatch(Selector("td"), i)
+    row_data = []
+    if !isempty(text) && length(text) > 1
+        for item in text
+            push!(row_data, nodeText(item))
+            # println("$item,")
+        end
+    push!(df, row_data)
+    end
+end
+
+
+mod_df = @chain df begin
+    @rsubset :act_dp != ""
+    @rtransform dep = Dates.Time(:act_dp, "HH:MMp")
+end
+
+
+
+