Test the Linear Gaussian Model: A comparison with Kalman-filtering algorithm

Testing pypomp 0.0.6 on 2025-05-15 at run level 2 (0 is for debugging, 2 is full-length).

Model Setup

We consider a linear Gaussian dynamic system, \[\begin{align} X_n &= A X_{n-1} + W_n \end{align}\] where \(X_n\) is the current state vector, \(A\) is the state transition matrix, and \(W_n \sim \mathcal{N}(0, Q)\) is the process noise, normally distributed with mean \(0\) and covariance \(Q\). The measurement model is \[\begin{align} Y_n &= C X_n + V_n, \end{align}\] where \(Y_n\) is the observation vector at time \(n\), \(C\) is the observation matrix, and \(V_n \sim \mathcal{N}(0, R)\) is the observation noise, normally distributed with mean \(0\) and covariance \(R\).

Here, we build the linear Gaussian POMP model explicitly rather than using pypomp.LG. That is in part because the full functionality is not yet in place. For example, we cannot simulate data from a POMP until the class has an rmeasure, which is not needed for filtering so has not been fully incorporated.

Set N (length of time series) to be 100 and generate linear Gaussian states and observations:

Code

# po = pypomp.LG(T=10)

def get_thetas(theta):
    A = theta[0:4].reshape(2, 2)
    C = theta[4:8].reshape(2, 2)
    Q = theta[8:12].reshape(2, 2)
    R = theta[12:16].reshape(2, 2)
    return A, C, Q, R

def transform_thetas(A, C, Q, R):
    return jnp.concatenate([A.flatten(), C.flatten(), Q.flatten(), R.flatten()])


fixed = False
key = jax.random.PRNGKey(111)
angle = 0.2
angle2 = angle if fixed else -0.5
A = jnp.array([[jnp.cos(angle2), -jnp.sin(angle)],
             [jnp.sin(angle), jnp.cos(angle2)]])
C = jnp.eye(2)
Q = jnp.array([[1, 1e-4],
             [1e-4, 1]]) # 100
R = jnp.array([[1, .1],
            [.1, 1]]) #/ 10
     
theta = transform_thetas(A, C, Q, R)

def generate_data(N, key):
    xs = []
    ys = []
    x = jnp.zeros(2)
    for i in range(N):
        key, subkey = jax.random.split(key)
        x = jax.random.multivariate_normal(key=subkey, mean=A @ x, cov=Q)
        key, subkey = jax.random.split(key)
        y = jax.random.multivariate_normal(key=subkey, mean=C @ x, cov=R)
        xs.append(x)
        ys.append(y)
    xs = jnp.array(xs)
    ys = jnp.array(ys)
    return xs, ys, key


def custom_rinit(theta, J, covars=None):
    return jnp.zeros((J, 2))


def custom_rproc(state, theta, key, covars=None):
    A, C, Q, R = get_thetas(theta)
    key, subkey = jax.random.split(key)
    return jax.random.multivariate_normal(key=subkey,
                                          mean=A @ state, cov=Q)
                      

def custom_dmeas(y, preds, theta):
    A, C, Q, R = get_thetas(theta)
    return jax.scipy.stats.multivariate_normal.logpdf(y, preds, R)


rinit = custom_rinit
rproc = custom_rproc
dmeas = custom_dmeas
rprocess = jax.vmap(custom_rproc, (0, None, 0, None))
dmeasure = jax.vmap(custom_dmeas, (None, 0, None))
rprocesses = jax.vmap(custom_rproc, (0, 0, 0, None))
dmeasures = jax.vmap(custom_dmeas, (None, 0, 0))

Set J=10000 particles and compare the estimated log-likelihood between Kalman filtering and the log-mean-exponential computed over 100 replications for various methods, including classical particle filtering and MOP, etc.

xs, ys, key = generate_data(N, key)
kf = pykalman.KalmanFilter(
    transition_matrices=A, observation_matrices=C, 
    transition_covariance=Q, observation_covariance=R,
    initial_state_covariance=Q)
print("kf loglik =", kf.loglikelihood(ys))

kf loglik = -378.402440173952

pickle_file = out_dir + "/pfilter-test.pkl"
if (os.path.exists(pickle_file)):
    loglike = pickle.load(open(pickle_file,'rb'))
else:   
    loglike = []
    for i in range(N_reps):  
        key, subkey = jax.random.split(key)
        pfilter_val = -_pfilter_internal(
            theta, ys, J = J, rinit = rinit,
            rprocess = rprocess, dmeasure = dmeasure,
            covars = None, key= subkey, thresh = -1)
        loglike.append(pfilter_val)
    pickle.dump(loglike,open(pickle_file,'wb'))

loglike_ = jnp.array(loglike)

print("Logmeanexp of Particle Filtering =", logmeanexp(loglike))
print("SE on logmeanexp  =", logmeanexp_se(loglike_))
print("difference between Kalman-Filtering and logmeanexp of Particle Filtering =",
    kf.loglikelihood(ys) - (logmeanexp(loglike)))

Logmeanexp of Particle Filtering = -378.45117
SE on logmeanexp  = 0.03881513
difference between Kalman-Filtering and logmeanexp of Particle Filtering = 0.048736572

We check that the difference between the Kalman filter log-likelihood and the particle filter log-likelihood is comparable to the Monte Carlo error, and that the Monte Carlo error is small as a result of many particles and many replications.

Next, we test the ouput of the MOP algorithm. The pypomp MOP algorithm is set to be \(\phi = \theta\). Under this case, the MOP algorithm should be equivalent with the particle filter algorithm, and have the same output values when setting the same random seeds.

alphas = [0, 0.1, 0.3, 0.6, 0.9, 1]



key = jax.random.PRNGKey(0)  # Use a fixed seed for reproducibility
subkeys = jax.random.split(key, 100)  # Pre-generate 100 keys

pickle_file = out_dir + "/test-alpha.pkl"
if (os.path.exists(pickle_file)):
    file=open(pickle_file,'rb')
    results = pickle.load(file)
    loglike_pf = pickle.load(file)
else:   
    results = []
    for alpha in alphas:
        loglike_mop = []
        for i, subkey in enumerate(subkeys):  
            mop_val = -_mop_internal(theta, ys, J=J, rinit=rinit,
                rprocess=rprocess, dmeasure=dmeasure,
                covars=None, key=subkey, alpha=alpha)
            loglike_mop.append(mop_val)
        loglike_mop = jnp.array(loglike_mop)
        logmeanexp_val = logmeanexp(loglike_mop)
        difference = kf.loglikelihood(ys) - logmeanexp_val
        results.append((alpha, logmeanexp_val, difference))
    # Use the same random key to test the particle filter output
    loglike_pf = []
    for i, subkey in enumerate(subkeys):  
        pfilter_val = -_pfilter_internal(
            theta, ys, J = J, rinit = rinit,
            rprocess = rprocess, dmeasure = dmeasure,
            covars = None, key= subkey, thresh = -1)
        loglike_pf.append(pfilter_val)
    file = open(pickle_file,'wb')
    pickle.dump(results,file)
    pickle.dump(loglike_pf,file)


alpha_table=tabulate(results, tablefmt="grid", headers=("alpha","pf","kalman-pf"))
print(alpha_table)

alpha	`pf`	kalman-pf
`0`	-378.452	0.0491028
`0.1`	-378.452	0.0491028
`0.3`	-378.452	0.0491028
`0.6`	-378.452	0.0491028
`0.9`	-378.452	0.0491333
`1`	-378.452	0.0491028

Consistency: For \(\alpha = 0\) and \(\alpha = 1\), the MOP logmeanexp is None and None respectively. For intermediate \(\alpha\) values, the MOP logmeanexp slightly deviates to None The Logmeanexp under \(\alpha = 0\) and \(\alpha = 1\) are closer to the Kalman-Filtering results and the particle filter logmeanexp outputs.

Test the Linear Gaussian Model: How estimate logllikehood difference and running time varys among different N and J

pickle_file = out_dir + "/N-J-test.pkl"
if (os.path.exists(pickle_file)):
    NJresults = pickle.load(open(pickle_file,'rb'))
else:   
    NJresults = []
    key = jax.random.PRNGKey(112)
    for N_val in N_values:
        for J_val in J_values:
            print(f"Running with N={N_val}, J={J_val}...")
            
            xs, ys, key = generate_data(N_val, key)
            pf_loglik_arr = []
            mop_loglik_arr = []
            elapsed_time1_arr = []
            elapsed_time2_arr = []
            
            for i in range(N_reps):  
                start_time = time.time()
                pf_val = -pfilter(J = J_val,
                rinit = rinit, rprocess = rprocess, dmeasure = dmeasure,
            theta = theta, ys = ys, thresh = 0, key = key)
                pf_loglik_arr.append(pf_val)
                elapsed_time1_arr.append(time.time() - start_time)

                start_time2 = time.time()
                mop_val = -mop(J = J_val,
                rinit = rinit, rprocess = rprocess, dmeasure = dmeasure,
            theta = theta, ys = ys, alpha = 0.9, key = key)
                mop_loglik_arr.append(mop_val)
                elapsed_time2_arr.append(time.time() - start_time2)
            
            pf_loglik_arr = jnp.array(pf_loglik_arr)
            mop_loglik_arr = jnp.array(mop_loglik_arr)
            elapsed_time1_arr = jnp.array(elapsed_time1_arr)
            elapsed_time2_arr = jnp.array(elapsed_time2_arr)

            NJresults.append({
                'N': N_val, 
                'J': J_val, 
                'pf_loglik': logmeanexp(pf_loglik_arr), 
                'time_pfilter': jnp.mean(elapsed_time1_arr), 
                'mop_loglik': logmeanexp(mop_loglik_arr), 
                'time_mop': jnp.mean(elapsed_time2_arr), 
            })
    pickle.dump(NJresults,open(pickle_file,'wb'))

table=tabulate(NJresults, tablefmt="grid", headers="keys")
print(table)

N	`J`	pf_loglik	time_pfilter	mop_loglik	time_mop
100	10	-426.772	`0.00331634`	`-426.772`	0.00347745
100	100	-400.311	`0.00592121`	`-400.311`	0.00622481
100	1000	-354.355	`0.0264283`	`-354.399`	0.0313027
100	10000	-364.488	`0.152326`	`-364.479`	0.164319
500	10	-2068.8	`0.00650719`	-2068.8	0.00674063
500	100	-1916.43	`0.0150236`	-1916.43	0.0178326
500	1000	-1875.09	`0.106222`	-1875.69	0.111074
500	10000	-1856.49	`0.616038`	-1858.5	0.641503

Log Likelihood vs. Particle Number for Particle Filter and MOP

Code

df = pd.DataFrame(NJresults)
unique_N = df['N'].unique()
num_panels = len(unique_N)

# Set up the subplots
fig, axes = plt.subplots(nrows=1, ncols=num_panels, figsize=(5 * num_panels, 5), sharey=False)

if num_panels == 1:
    axes = [axes]  # Ensure axes is iterable if there's only one panel

for ax, N_val in zip(axes, unique_N):
    sub_df = df[df['N'] == N_val]
    sub_df = sub_df.sort_values('J')  # Ensure lines connect in correct order

    ax.plot(sub_df['J'], sub_df['pf_loglik'], label='pf_loglik', marker='o')
    ax.plot(sub_df['J'], sub_df['mop_loglik'], label='mop_loglik', marker='s')

    ax.set_title(f'N = {N_val}')
    ax.set_xlabel('J')
    ax.set_ylabel('Log-likelihood')
    ax.legend()
    ax.grid(True)

plt.tight_layout()
plt.show()

Runtime vs. Particle Number J for Particle Filter and MOP

Code

plt.figure(figsize=(8, 6))
for N_val in unique_N:
    sub_df = df[df['N'] == N_val]
    plt.plot(sub_df['J'], sub_df['time_pfilter'], label=f'Particle Filter (N={N_val})', marker='o')
    plt.plot(sub_df['J'], sub_df['time_mop'], label=f'MOP (N={N_val})', marker='x')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Number of Particles (J)', fontsize=12)
plt.ylabel('Runtime (seconds)', fontsize=12)
plt.title('Runtime vs Particle Number for Different N', fontsize=14)
plt.legend(fontsize=10)
plt.grid(True, which="both", linestyle="--", linewidth=0.5)
plt.show()

memory requirement scales (J,N) for autodiff? - How much RAM we use

Comparison of the negative loglikelihood distribution of pfilter on Pypomp and Rpomp when J is small.

J = 10
nrep = N_reps
pypf_loglik_arr = []
xs, ys, key = generate_data(N, key)
for i in range(nrep):
    key, subkey = jax.random.split(key)
    pf_val = -pfilter(J = J,
        rinit = rinit, rprocess = rprocess, dmeasure = dmeasure,
    theta = theta, ys = ys, thresh = 2, key=subkey)
    pypf_loglik_arr.append(pf_val)
    
plt.hist(pypf_loglik_arr, bins=30, edgecolor='black')
plt.title("Distribution of Data")
plt.xlabel("Value")
plt.ylabel("Frequency")
# plt.show()

Text(0, 0.5, 'Frequency')