Gaussian Process for Event likelihoods

Preliminary steps

Loading necessary packages

using Plots
using AugmentedGaussianProcesses
using Distributions

Creating some random data

n_data = 200
X = (rand(n_data) .- 0.5) * 40
r = 5.0
Y = rand.(NegativeBinomial.(r, AGP.logistic.(sin.(X))))
scatter(X, Y)

Run GP model with negative binomial likelihood to learn p

kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)
m_negbinomial = VGP(
    X, Y, kernel, NegBinomialLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
)
@time train!(m_negbinomial, 20)
(Variational Gaussian Process with a Negative Binomial Likelihood (r = 5.0) infered by Analytic Variational Inference , (local_vars = (c = [0.16858106590299093, 0.2497245318513108, 0.41192553679858457, 0.17323607126526613, 0.2822046362176117, 0.18764512302546785, 0.381999318116893, 0.36887155567598423, 0.6078771890579732, 0.3260096694911288  …  0.3042805727489006, 0.5384600713896752, 0.43533162411389004, 0.5219762880657483, 0.5036106787907947, 0.31489252598745876, 0.32200659703972934, 0.41395766155390323, 0.32258839698066655, 0.46482108053807764], θ = [7.981107306563304, 8.456100422288753, 11.340100138600157, 5.985039527681864, 4.967079048871656, 3.9883042823076056, 12.350182190430216, 5.438473348249442, 3.396063798507829, 6.443035770784334  …  5.457953694180219, 6.835632782803655, 8.368257324807097, 8.801073743121407, 14.20111613147339, 5.454999067322923, 5.452963868694997, 3.4508615931472693, 7.93133890550231, 8.841382217134937]), opt_state = (NamedTuple(),), hyperopt_state = (NamedTuple(),), kernel_matrices = ((K = LinearAlgebra.Cholesky{Float64, Matrix{Float64}}([1.0000499987500624 4.194648514321859e-270 … 0.0007707672521754288 0.00024218944904762083; 4.194858241504526e-270 1.0000499987500624 … 2.669008102698157e-215 2.6636580508691252e-211; … ; 0.0007708057895746266 2.6691415497671985e-215 … 0.012044464678818851 0.0015687729296333837; 0.00024220155821735151 2.663791230442262e-211 … 0.9576147146314625 0.011050062816313546], 'U', 0),),)))

Running the same model but with a Poisson likelihood

kernel = SqExponentialKernel() ∘ ScaleTransform(1.0)
m_poisson = VGP(
    X, Y, kernel, PoissonLikelihood(r), AnalyticVI(); optimiser=false, verbose=2
)
@time train!(m_poisson, 20)
(Variational Gaussian Process with a Poisson Likelihood (λ = 14.558168422148544) infered by Analytic Variational Inference , (local_vars = (c = [0.2764754074866244, 0.1749161544481216, 0.3170936799368449, 0.26113024247180583, 0.5221144989434324, 0.18014151267700879, 0.27442762608890475, 0.2621562941429809, 0.7941580636736743, 0.21017626702709488  …  0.1844174595233179, 0.4974199961225127, 0.35189794884569287, 0.47154079300071067, 0.4403636808139311, 0.19084929825261526, 0.5482429451914524, 0.6343333787194637, 0.1997723499157573, 0.38932210388195865], θ = [9.487450303968286, 9.62335042239263, 12.00545335449778, 7.466495681558742, 6.877550058166185, 5.314263561503111, 13.10606681648127, 6.187021214463446, 5.613904555875322, 7.373176241711151  …  6.398490435650874, 7.135526183213348, 8.946101932367048, 9.151097385199511, 14.643173703321844, 6.354584288453534, 7.385743667508284, 5.511070561684602, 8.824185508519664, 9.351306091967281], γ = [8.095614888243098, 7.295747967770698, 6.211758552991208, 8.017750565975197, 9.066163669644844, 7.6572537529343805, 6.376431643122372, 6.4448295103840145, 9.811800772406846, 6.800596418674112  …  6.83322889249774, 5.564099262389634, 6.076459784330134, 5.640070154797517, 5.758092624465646, 6.747721164787685, 9.139636268704058, 9.389275740715796, 6.70702607159126, 5.938249788598529]), opt_state = (NamedTuple(),), hyperopt_state = (NamedTuple(),), kernel_matrices = ((K = LinearAlgebra.Cholesky{Float64, Matrix{Float64}}([1.0000499987500624 4.194648514321859e-270 … 0.0007707672521754288 0.00024218944904762083; 4.194858241504526e-270 1.0000499987500624 … 2.669008102698157e-215 2.6636580508691252e-211; … ; 0.0007708057895746266 2.6691415497671985e-215 … 0.012044464678818851 0.0015687729296333837; 0.00024220155821735151 2.663791230442262e-211 … 0.9576147146314625 0.011050062816313546], 'U', 0),),)))

Prediction and plot function on a grid Create a grid and compute prediction on it

function compute_grid(model, n_grid=50)
    mins = -20
    maxs = 20
    x_grid = range(mins, maxs; length=n_grid) # Create a grid
    y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1)) # Predict the mean and variance on the grid
    return y_grid, sig_y_grid, x_grid
end
compute_grid (generic function with 2 methods)

Plot the data as a scatter plot

function plot_data(X, Y)
    return Plots.scatter(X, Y; alpha=0.33, msw=0.0, lab="", size=(800, 500))
end

function plot_model(model, X, Y, title=nothing)
    n_grid = 100
    y_grid, sig_y_grid, x_grid = compute_grid(model, n_grid)
    p = plot_data(X, Y)
    Plots.plot!(
        p,
        x_grid,
        y_grid;
        ribbon=2 * sqrt.(sig_y_grid), # Plot 2 std deviations
        title=title,
        color="red",
        lab="",
        linewidth=3.0,
    )
    return p
end;

Comparison between the two likelihoods

Plots.plot(
    plot_model.(
        [m_negbinomial, m_poisson], Ref(X), Ref(Y), ["Neg. Binomial", "Poisson"]
    )...;
    layout=(1, 2),
)

This page was generated using Literate.jl.