diff --git a/LearnJulia/src/JFDA.jl b/LearnJulia/src/JFDA.jl index f6724ee..a9efa09 100644 --- a/LearnJulia/src/JFDA.jl +++ b/LearnJulia/src/JFDA.jl @@ -3,6 +3,7 @@ using CSV using DataFrames using Plots +using SQLite puzzlespath = joinpath("data", "puzzles.csv"); @@ -13,4 +14,31 @@ show(describe(puzzles))# use to show summary stats # 8.3.4 Quick Histogram plot([histogram(puzzles[!,col]; label = col) for - col in ["Rating", "RatingDeviation", "Popularity", "NbPlays"]]...) \ No newline at end of file + col in ["Rating", "RatingDeviation", "Popularity", "NbPlays"]]...) + + +# 8.4.2 SQLITE +db = SQLite.DB(joinpath("data", "puzzles.db")); +SQLite.load!(puzzles, db, "puzzles") # add puzzles to Database into a table named Puzzles + +query = DBInterface.execute(db, "SELECT * FROM puzzles") + +puzzles_db = DataFrame(query); + +close(db) + + +# Chapter 9 + +using Statistics + +plays_lo = median(puzzles.NbPlays); +rating_lo = 1500; +rating_hi = quantile(puzzles.Rating, 0.99); + +row_selector = (puzzles.NbPlays .> plays_lo) .&& + (rating_lo .< puzzles.Rating .< rating_hi); + +good = puzzles[row_selector, ["Rating", "Popularity"]] + +plot(histogram(good.Rating; label = "Rating"), histogram(good.Popularity; label = "Popularity")) \ No newline at end of file diff --git a/Project.toml b/Project.toml index 9c9938b..cb0b823 100644 --- a/Project.toml +++ b/Project.toml @@ -2,3 +2,4 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9"