Utility functions
Select Iterator
In many cases our model is updating some state as an iterator. I want to be able to select arbitrary slices from that iterator. The Select object contains the main iterable and a selection iterable that should yield integers. When iterated upon, we repeatedly use Iterators.dropwhile to get to the next index.
file:src/Utility.jl
module Utility
export select, in_units_of
using Unitful
struct Select
iter
selection
end
function select(it, sel)
Select(enumerate(it), sel)
end
function Base.iterate(s::Select)
x = iterate(s.selection)
if x !== nothing
(idx, selstate) = x
((_, value), rest) = Iterators.peel(Iterators.dropwhile(((i, y),) -> i != idx, s.iter))
return (value, (selstate, rest))
else
return nothing
end
end
function Base.iterate(s::Select, state)
(selstate, rest) = state
x = iterate(s.selection, selstate)
if x !== nothing
(idx, selstate) = x
((_, value), rest) = Iterators.peel(Iterators.dropwhile(((i, y),) -> i != idx, s.iter))
return (value, (selstate, rest))
else
return nothing
end
end
<<utility>>
endTagging a sequence of vectors
The enumerate_seq iterator is also used in the algorithm to trace hiatus in sediment accumulation. Here the task is to enumerate a nested sequence while preserving the nested structure.
⪡utility-spec⪢≣
a = [[:a, :b], [:c]]
@test collect(enumerate_seq(a)) == [[(1, :a), (2, :b)], [(3, :c)]]⪡utility⪢≣
struct TagVectors{T}
vectors::T
end
function Base.iterate(tv::TagVectors{T}) where T
tag = 1
x = iterate(tv.vectors)
isnothing(x) && return nothing
(v_, nit) = x
v = collect(v_)
n = length(v)
return zip(tag:tag+n-1, v) |> collect, (tag+n, nit)
end
function Base.iterate(tv::TagVectors{T}, st::Tuple{Int,U}) where {T, U}
(tag, it) = st
isnothing(it) && return nothing
x = iterate(tv.vectors, it)
isnothing(x) && return nothing
(v_, nit) = x
v = collect(v_)
n = length(v)
return zip(tag:tag+n-1, v) |> collect, (tag+n, nit)
end
Base.IteratorSize(::Type{TagVectors{T}}) where T = Base.IteratorSize(T)
Base.size(r::TagVectors{T}) where T = size(r.vectors)
Base.length(r::TagVectors{T}) where T = length(r.vectors)
Base.eltype(::Type{TagVectors{T}}) where T = Any
# This should be:
# Iterators.Zip{Tuple{UnitRange{Int},Vector{eltype(eltype(T))}}}
# But that doesn't work
"""
enumerate_seq(s)
Enumerates an iterator of sequences, such that the following equivalence
holds:
enumerate(flatten(s)) == flatten(enumerate_seq(s))
"""
enumerate_seq(s::T) where T = TagVectors{T}(s)
export enumerate_seqfile:test/UtilitySpec.jl
@testset "CarboKitten.Utility" begin
using CarboKitten.Utility
<<utility-spec>>
endReading data files
file:src/DataSets.jl
module DataSets
export read_tsv, read_csv
using DelimitedFiles: readdlm
using DataFrames
using CategoricalArrays
using CSV
using Pkg.Artifacts
using Unitful
function read_tsv(filename)
data, header = readdlm(filename, '\t', header=true)
return DataFrame(data, vec(header))
end
function read_csv(filename)
return DataFrame(CSV.File(filename))
end
function artifact_dir()
subfolder = first(readdir(artifact"data"))
return joinpath(artifact"data", subfolder)
end
function bosscher_schlager_1992()
dir = artifact_dir()
filename = joinpath(dir, "Bosscher1992", "bs92-sealevel-curve.csv")
df = read_csv(filename)
return DataFrame(time=df.time * u"yr", sealevel=df.depth * u"m")
end
function miller_2020()
dir = artifact_dir()
filename = joinpath(dir, "Miller2020", "Cenozoic_sea_level_reconstruction.tab")
df = read_tsv(filename)
return DataFrame(
time=-df[!,4] * u"kyr",
sealevel=df[!,7] * u"m",
refkey=categorical(df[!,2]),
reference=categorical(df[!,3]))
end
end