updates to Chap 4

This commit is contained in:
Kyle Belanger 2024-08-05 16:53:16 -04:00
parent e98ba8ad6b
commit 7ce256e541

View file

@ -1,7 +1,10 @@
using DataFrames using DataFrames
using CSV using CSV
using Dates
using CategoricalArrays
using DataFramesMeta
# Julid for Data Sciences makes use of functions to keep variable scooping under control # Julia for Data Sciences makes use of functions to keep variable scooping under control
function grades_2020() function grades_2020()
name = ["Sally", "Bob", "Alice", "Hank"] name = ["Sally", "Bob", "Alice", "Hank"]
@ -14,6 +17,7 @@ function write_grades()
CSV.write(path, grades_2020()) CSV.write(path, grades_2020())
end end
# Chapter 4.3
filter(:name => ==("Alice"), grades_2020()) filter(:name => ==("Alice"), grades_2020())
@ -32,3 +36,36 @@ function salaries()
end end
subset(salaries(), :salary => ByRow(>(2_000)); skipmissing = true) subset(salaries(), :salary => ByRow(>(2_000)); skipmissing = true)
# Chapter 4.4
function responses()
id = [1, 2]
q1 = [28, 61]
q2 = [:us, :fr]
q3 = ["F", "B"]
q4 = ["B", "C"]
q5 = ["A", "E"]
DataFrame(; id, q1, q2, q3, q4, q5)
end
renames = (1 => "particiapant", :q1 =>"age", :q2 => "nationality")
select(responses(), renames...)
# Chapter 4.5
function wrong_types()
id = 1:4
date = ["28-01-2018", "03-04-2019", "01-08-2018", "22-11-2020"]
age = ["adolescent", "adult", "infant", "adult"]
DataFrame(; id, date, age)
end
# Trying out DataFramesMeta
df = wrong_types()
ages_levels = ["infant", "adolescent", "adult"]
strings2dates(dates::Vector) = Date.(dates, dateformat"dd-mm-yyyy")
@transform df begin
:date = strings2dates(:date)
:age = categorical(:age; levels = ages_levels, ordered = true)
end