From 7ce256e541ce5d0051c3be32fbb3ae6410d31928 Mon Sep 17 00:00:00 2001 From: Kyle Belanger Date: Mon, 5 Aug 2024 16:53:16 -0400 Subject: [PATCH] updates to Chap 4 --- LearnJulia/src/Chapter4.jl | 41 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/LearnJulia/src/Chapter4.jl b/LearnJulia/src/Chapter4.jl index 161a546..b22ea43 100644 --- a/LearnJulia/src/Chapter4.jl +++ b/LearnJulia/src/Chapter4.jl @@ -1,7 +1,10 @@ using DataFrames using CSV +using Dates +using CategoricalArrays +using DataFramesMeta -# Julid for Data Sciences makes use of functions to keep variable scooping under control +# Julia for Data Sciences makes use of functions to keep variable scooping under control function grades_2020() name = ["Sally", "Bob", "Alice", "Hank"] @@ -14,6 +17,7 @@ function write_grades() CSV.write(path, grades_2020()) end +# Chapter 4.3 filter(:name => ==("Alice"), grades_2020()) @@ -31,4 +35,37 @@ function salaries() DataFrame(; names, salary) end -subset(salaries(), :salary => ByRow(>(2_000)); skipmissing = true) \ No newline at end of file +subset(salaries(), :salary => ByRow(>(2_000)); skipmissing = true) + +# Chapter 4.4 +function responses() + id = [1, 2] + q1 = [28, 61] + q2 = [:us, :fr] + q3 = ["F", "B"] + q4 = ["B", "C"] + q5 = ["A", "E"] + DataFrame(; id, q1, q2, q3, q4, q5) +end + +renames = (1 => "particiapant", :q1 =>"age", :q2 => "nationality") +select(responses(), renames...) + +# Chapter 4.5 +function wrong_types() + id = 1:4 + date = ["28-01-2018", "03-04-2019", "01-08-2018", "22-11-2020"] + age = ["adolescent", "adult", "infant", "adult"] + DataFrame(; id, date, age) +end + +# Trying out DataFramesMeta +df = wrong_types() +ages_levels = ["infant", "adolescent", "adult"] +strings2dates(dates::Vector) = Date.(dates, dateformat"dd-mm-yyyy") + + +@transform df begin + :date = strings2dates(:date) + :age = categorical(:age; levels = ages_levels, ordered = true) +end