add solutions to block 3

Datseris · Datseris · commit 7f21b1b6afde · 2022-07-20T17:33:51.000+02:00
diff --git a/block3_softwaredev/plot_timeseries_functions.py b/block3_softwaredev/plot_timeseries_functions.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+
+
+def download_data(url):
+    response = requests.get(url)
+
+    temp = Path("temp")
+    temp.write_bytes(response.content)
+    timeseries = np.genfromtxt("temp")
+    temp.unlink()
+
+    return timeseries
+
+
+def moving_average(t, x, w=12):
+    n = len(x)
+    m = np.zeros(len(x) - w)
+    m[0] = sum(x[:w]) / w
+    for i in range(1, n - w):
+        m[i] = m[i - 1] + (x[i + w] - x[i - 1]) / w
+
+    return t[:-w], m
+
+
+def determine_trend(y):
+    x = np.arange(y.size) + 1
+    mx = np.mean(x)
+    my = np.mean(y)
+    b = np.cov(y, x, bias=y.mean())[0, 1] / np.var(x)
+    a = my - b * mx
+    trend = a + b * x
+
+    return trend
+
+
+def nrmse(y, z):
+    n = np.size(y)
+    mse = np.sum(np.abs(y - z)) / n
+    msemean = np.sum(np.abs(y - np.mean(y))) / n
+    print(mse, msemean)
+
+    return np.sqrt(mse / msemean)
+
+
+def plot_timeseries(x, ax=None):
+    if ax is None:
+        ax = plt.gca()
+
+    t = np.arange(x.size)
+    ax.plot(t, x, linewidth=1)
+
+    ax.plot(*moving_average(t, x), linewidth=2)
+
+    trend = determine_trend(x)
+    rmse = nrmse(x, trend)
+    ax.plot(t, trend, linewidth=2, linestyle=":", label=f"nrmse={rmse:.3f}")
+    ax.legend()
+
+
+def main():
+    url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
+    timeseries = download_data(url)
+
+    fig, axes = plt.subplots(nrows=2)
+    for ts, ax in zip(timeseries.T, axes.flatten()):
+        plot_timeseries(ts, ax=ax)
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/block3_softwaredev/plot_timeseries_monolithic_script.jl b/block3_softwaredev/plot_timeseries_monolithic_script.jl
@@ -0,0 +1,22 @@
+# plot timeseries
+# Here is a descritpion for the script.
+include("plot_timeseries_source.jl")
+url = raw"https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
+w = 12
+
+timeseries = download_timeseries(url)
+moving_averaged = moving_average.(timeseries, w)
+trends = fit_trend.(timeseries)
+nrmses = nrmse.(trends, timeseries)
+
+fig = figure()
+for i in 1:length(timeseries)
+    ax = subplot(2, 1, i)
+    x = timeseries[i]
+    t = 1:length(timeseries[i])
+    plot(t, timeseries[i]; linewidth = 1)
+    plot(t[1:end-w], moving_averaged[i]; linewidth = 2)
+    plot(t, trends[1]; linewidth = 2, linestyle = ":", label = "nrmse=$(nrmses[i])")
+    ylabel("quantity $i")
+    legend()
+end
diff --git a/block3_softwaredev/plot_timeseries_monolithic_source.jl b/block3_softwaredev/plot_timeseries_monolithic_source.jl
@@ -0,0 +1,42 @@
+using DelimitedFiles
+using Downloads
+function download_timeseries(url)
+    response = Downloads.request(url)
+    @assert response.status == 200 "URL doesn't exist!"
+    Downloads.download(url, "temp")
+    X = try
+        readdlm("temp")
+    catch err
+        throw(ArgumentError("Downloaded file isn't tabular text format!"))
+    end
+    rm("temp")
+    timeseries = eachcol(x)
+end
+
+function moving_average(x, w)
+    n = length(x)
+    m = zeros(length(x)-w)
+    m[1] = sum(x[1:w])/w
+    for i in 2:n-w
+        m[i] = m[i-1] + (x[i+w] - x[i-1])/w
+    end
+    return m
+end
+
+using Statistics: mean, covm, varm
+function fit_trend(y, x = 1:length(y))
+    x = 1:length(y)
+    mx = mean(x)
+    my = mean(y)
+    b = covm(x, mx, y, my)/varm(x, mx)
+    a = my - b*mx
+    trend = @. a + b*x
+    return trend, nrmse(y, trend, my)
+end
+
+function nrmse(y, z, my = mean(y))
+    n = length(y)
+    mse = sum(abs2(z[i] - y[i]) for i in 1:n) / n
+    msemean = sum(abs2(y[i] - my) for i in 1:n) / n
+    nrmse = sqrt(mse/msemean)
+end
diff --git a/block3_softwaredev/plot_timeseries_numpy.py b/block3_softwaredev/plot_timeseries_numpy.py
@@ -0,0 +1,23 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+from io import StringIO
+
+url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
+w = 12
+
+response = requests.get(url)
+timeseries = np.genfromtxt(StringIO(response.text))
+
+fig, axes = plt.subplots(nrows=2)
+for ts, ax in zip(timeseries.T, axes):
+    x = np.arange(ts.shape[0])
+    moving_average = np.convolve(ts, np.ones(w) / w, mode="same")
+    popt = np.polyfit(x, ts, deg=1)
+    rmse = np.sqrt(np.mean((np.polyval(popt, x) - ts) ** 2))
+
+    ax.plot(x, ts)
+    ax.plot(x, moving_average)
+    ax.plot(x, np.polyval(popt, x), ls=":", label=f"nrmse={rmse:.3f}")
+    ax.legend()
+plt.show()
diff --git a/block3_softwaredev/plot_timeseries_xarray.py b/block3_softwaredev/plot_timeseries_xarray.py
@@ -0,0 +1,24 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+
+url = "https://raw.githubusercontent.com/JuliaDynamics/NonlinearDynamicsTextbook/master/exercise_data/11.csv"
+w = 12
+
+ds = pd.read_csv(url, delimiter="\t", names=["var1", "var2"]).to_xarray()
+
+fig, axes = plt.subplots(nrows=2)
+for var, ax in zip(ds, axes):
+    ds[var].plot(ax=ax)
+    ds[var].rolling(index=w).mean().plot(ax=ax)
+    p = ds[var].polyfit(deg=1, dim="index")
+    rmse = np.sqrt(
+        np.mean((xr.polyval(ds.index, p.polyfit_coefficients) - ds[var]) ** 2)
+    ).data
+    xr.polyval(ds.index, p.polyfit_coefficients).plot(
+        ls=":", label=f"nrmse={rmse:.3f}", ax=ax
+    )
+    ax.legend()
+plt.show()
diff --git a/block3_softwaredev/running_mean.py b/block3_softwaredev/running_mean.py
@@ -0,0 +1,95 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def running_mean(x, win_size):
+    """Compute a running mean with a given window size.
+
+    Parameters:
+        x (ndarray): Input data.
+        win_size (int): Window size.
+
+    Returns:
+        ndarray: Smoothened data.
+    """
+    return np.convolve(x, np.ones(w) / w, mode="valid")
+
+
+def running_mean(x, win_size, win_type=np.ones):
+    """Compute a running mean with a given window size.
+
+    Parameters:
+        x (ndarray): Input data.
+        win_size (int): Window size.
+        win_type (callable): A callable object that when passed a `win_size`
+            will return an array of weights.
+
+    Returns:
+        ndarray: Smoothened data.
+    """
+    w = win_type(win_size)
+    return np.convolve(x, w / w.sum(), mode="valid")
+
+
+def running_mean(x, win_size, win_type=np.ones, aggregation=None):
+    """Compute a running mean with a given window size.
+
+    Parameters:
+        x (ndarray): Input data.
+        win_size (int): Window size.
+        win_type (callable): A callable object that when passed a `win_size`
+            will return an array of weights.
+        aggregartion (callable): A callable object which aggregrates the data
+            within the window region. By default, the function computes a
+            running mean.
+
+    Note:
+        In the default configuration, i.e. a running mean, the function makes
+        use of a convolution which is implemented in a very efficient way.
+        When passing the `aggregation` keyword this approach is no
+        longer feasible because the data has to be explicitly "grouped".
+
+    Returns:
+        ndarray: Smoothened data.
+    """
+    w = win_type(win_size)
+
+    if aggregation is None:
+        # In the default case, we can use fast Fourier transform
+        return np.convolve(x, w / w.sum(), mode="valid")
+    else:
+        # In the generic case, we have to group our array which is slower.
+        # Therefore, it is good to have a separate API (i.e. keyword) for this case.
+        return aggregation(
+            np.lib.stride_tricks.sliding_window_view(x, win_size) * w,
+            axis=1,
+        ) / w.mean()
+
+
+def main():
+    windows = (
+        np.ones,
+        np.bartlett,
+        np.blackman,
+        np.hamming,
+    )
+
+    np.random.seed(1)
+    x = np.random.randn(256) + 2
+
+    fig, ax = plt.subplots(figsize=(10, 6))
+    ax.plot(x, c="grey")
+    for window in windows:
+        ax.plot(
+            running_mean(x, win_size=16, win_type=window),
+            linewidth=2,
+            label=window.__name__.capitalize(),
+        )
+    ax.legend()
+    ax.set_ylim(np.percentile(x, [5, 95]))
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/block3_softwaredev/temporal_means_generalized.jl b/block3_softwaredev/temporal_means_generalized.jl
@@ -0,0 +1,50 @@
+#=
+This version shows the code that generalizable and is practically a simplification
+of the code surrounding `monthlyagg` and co. in ClimateBase.jl.
+=#
+using Dates
+using Statistics
+
+monthlymeans(t, x) = monthlyagg(t, x; agg = mean)
+
+function monthlyagg(t, x; agg = mean)
+    return temporal_aggregation(t, x; agg, info = Dates.month)
+end
+
+function temporal_aggregation(t::AbstractVector{<:TimeType}, x::Vector;
+        agg = mean, info = Dates.month
+    )
+    tranges = temporal_ranges(t, info)
+    y = [agg(view(x, r)) for r in tranges]
+    coarse_t = [middle_date(t[r[1]], t[r[end]]) for r in tranges]
+    # TODO: We can have a `prettify_coarse_t` function to make
+    # the time vector better in cases where it is possble,
+    # e.g. like t[1]:Month(1):t[end]
+    return coarse_t, y
+end
+
+middle_date(t0, t1) = ((d0, d1) = DateTime.((t0, t1)); d0 + (d1 - d0)/2)
+
+function temporal_ranges(t::AbstractArray{<:TimeType}, info = Dates.month)
+    @assert issorted(t) "Sorted time required."
+    L = length(t)
+    r = Vector{UnitRange{Int}}()
+    i, x = 1, info(t[1]) # previous entries
+    for j in 2:L
+        y = info(t[j])
+        x == y && continue
+        push!(r, i:(j-1))
+        i, x = j, y
+    end
+    push!(r, i:L) # final range not included in for loop
+    return r
+end
+
+# Testing vectors
+t = Date(2015, 1, 1):Day(1):Date(2020, 12, 31)
+x = float.(month.(t))
+m, y = monthlymeans(t, x)
+
+# Test with summer and winter
+summer(x) = month(x) ∈ (3,4,5,6,7,8)
+m, y = temporal_aggregation(t, x; info = summer)
diff --git a/block3_softwaredev/temporal_means_generalized_tests.jl b/block3_softwaredev/temporal_means_generalized_tests.jl
diff --git a/block3_softwaredev/temporal_means_monthly_specific.jl b/block3_softwaredev/temporal_means_monthly_specific.jl