Source code for pyace.validate

import os
import numpy as np
import matplotlib as mpl
import warnings

# TODO: check if running in interactive mode
mpl.use('Agg')
import matplotlib.pyplot as plt

from scipy import stats


[docs]def plot_analyse_error_distributions(df_pred, fig_prefix='train_', fig_path='.', imagetype="png"):
    if not os.path.isdir(fig_path):
        os.makedirs(fig_path)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        df_pred["energy_pred"] = df_pred["energy_pred"].astype(float)
        df_pred["energy_corrected_per_atom"] = df_pred["energy_corrected_per_atom"].astype(float)
        df_pred["energy_corrected"] = df_pred["energy_corrected"].astype(float)

        # energies
        df_pred["energy_pred_per_atom"] = df_pred["energy_pred"] / df_pred["NUMBER_OF_ATOMS"]

        df_pred["dE"] = df_pred["energy_pred"] - df_pred["energy_corrected"]
        df_pred["dEpa"] = df_pred["dE"] / df_pred["NUMBER_OF_ATOMS"]

        # forces
        forces = np.vstack(df_pred["forces"])
        forces_pred = np.vstack(df_pred["forces_pred"])
        forces_norm = np.linalg.norm(forces, axis=1)
        dforces = forces_pred - forces
        dforces_norm = np.linalg.norm(dforces, axis=1)
        dforces_flatten = dforces.flatten()
        forces_flatten = forces.flatten()

        energy_corrected_per_atom = df_pred["energy_corrected_per_atom"].values
        energy_pred_per_atom = df_pred["energy_pred_per_atom"].values
        dEpa = df_pred["dEpa"].values

        #####################################
        # 1. E/F pair plots
        #####################################

        fig, (ax1, ax2) = plt.subplots(1, 2, dpi=150, figsize=(8, 4))

        ax1.scatter(energy_corrected_per_atom, energy_pred_per_atom, s=3)
        ax1.set_xlabel('E(DFT), eV/at')
        ax1.set_ylabel('E, eV/at')
        ax1.set_title('Energy')

        xlim = ax1.get_ylim()
        ylim = ax1.get_ylim()
        lim = (min(xlim[0], ylim[0]), max(xlim[1], ylim[1]))
        ax1.set_xlim(lim)
        ax1.set_ylim(lim)
        ax1.set_aspect(1)

        ax2.scatter(forces, forces_pred, s=3)
        ax2.set_xlabel('F$_i$(DFT), eV/$\\AA$')
        ax2.set_ylabel('F$_i$, eV/$\\AA$')
        ax2.set_title('Force components')
        xlim = ax2.get_ylim()
        ylim = ax2.get_ylim()
        lim = (min(xlim[0], ylim[0]), max(xlim[1], ylim[1]))
        ax2.set_xlim(lim)
        ax2.set_ylim(lim)
        ax2.set_aspect(1)
        plt.tight_layout()
        plt.savefig(os.path.join(fig_path, fig_prefix + "EF-pairplots."+imagetype))

        ########################################
        ## 2. E-error dist plot
        ########################################
        fig, ((ax0, ax_inv), (ax1, ax2)) = plt.subplots(2, 2, dpi=150, figsize=(7, 4),
                                                        gridspec_kw={'width_ratios': [7, 1],
                                                                     'height_ratios': [1, 3]
                                                                     })

        ax_inv.set_visible(False)

        energy_hist_res = ax0.hist(energy_corrected_per_atom, bins=100, color="gray")
        ax0.get_xaxis().set_visible(False)
        ax0.get_yaxis().set_visible(False)
        yl = ax0.get_ylim()
        lim = (yl[0] - (yl[1] - yl[0]) * 0.05, yl[1])
        ax0.set_ylim(lim)

        ##########
        # samples
        ax1.scatter(energy_corrected_per_atom, dEpa * 1e3, s=3, label="samples", color="lightsteelblue")
        # aggregated over bins
        bins = energy_hist_res[1]
        bins_inds = np.digitize(energy_corrected_per_atom, bins=bins) - 1

        dEpa_per_bin = np.zeros_like(bins)
        for bin_ind in range(min(bins_inds), max(bins_inds) + 1):
            dEpa_per_bin[bin_ind] = np.sqrt(np.mean(dEpa[bins_inds == bin_ind] ** 2))

        dEpa_per_bin = np.nan_to_num(dEpa_per_bin, copy=False, nan=0)

        ax1.plot(bins, dEpa_per_bin * 1e3, color="red", label="RMSE", lw=2)

        ax1.set_xlabel('E(DFT), eV/atom')
        ax1.set_ylabel('dE, meV/atom')

        ax1.legend()
        ##########
        ax2.get_xaxis().set_visible(False)
        ax2.get_yaxis().set_visible(False)

        hist_res = ax2.hist(dEpa * 1e3, bins=100, density=True, color="gray", orientation="horizontal");
        dE_std = np.std(dEpa)
        dE_mean = np.mean(dEpa)
        dExx = hist_res[1]
        ax2.plot(stats.norm.pdf(dExx, loc=dE_mean * 1e3, scale=dE_std * 1e3), dExx, color='orange', ls='--')

        yl = ax2.get_xlim()
        lim = (yl[0] - (yl[1] - yl[0]) * 0.05, yl[1])
        ax2.set_xlim(lim)
        ax2.set_ylim(ax1.get_ylim())
        plt.subplots_adjust(wspace=0.0, hspace=0.0)

        plt.savefig(os.path.join(fig_path, fig_prefix + "E-dE-dist."+imagetype))

        ##############################
        # 3. Force component dist plot
        ##############################

        fig, ((ax0, ax_inv), (ax1, ax2)) = plt.subplots(2, 2, dpi=150, figsize=(7, 4),
                                                        gridspec_kw={'width_ratios': [7, 1],
                                                                     'height_ratios': [1, 3]
                                                                     })

        ax_inv.set_visible(False)
        force_hist_res = ax0.hist(forces_flatten, bins=150, color="gray")
        ax0.get_xaxis().set_visible(False)
        ax0.get_yaxis().set_visible(False)
        yl = ax0.get_ylim()
        lim = (yl[0] - (yl[1] - yl[0]) * 0.05, yl[1])
        ax0.set_ylim(lim)

        ##############3

        ax1.scatter(forces_flatten, dforces_flatten * 1e3, s=3, label="sample", color="lightsteelblue")
        ax1.set_xlabel('F$_i$, eV/$\\AA$')
        ax1.set_ylabel('dF$_i$, meV/$\\AA$')

        # aggregated over bins
        bins = force_hist_res[1]
        bins_inds = np.digitize(forces_flatten, bins=bins) - 1

        de_per_bin = np.zeros_like(bins)

        for bin_ind in range(min(bins_inds), max(bins_inds) + 1):
            de_per_bin[bin_ind] = np.sqrt(np.mean(dforces_flatten[bins_inds == bin_ind] ** 2))

        de_per_bin = np.nan_to_num(de_per_bin, copy=False, nan=0)
        ax1.plot(bins, de_per_bin * 1e3, color="red", lw=2, label="RMSE")

        ax1.legend()
        #################
        ax2.get_xaxis().set_visible(False)
        ax2.get_yaxis().set_visible(False)
        hist_res = ax2.hist(dforces_flatten * 1e3,
                            orientation='horizontal',
                            bins=100,
                            density=True, color="gray")

        df_mean = np.mean(dforces_flatten)
        df_std = np.std(dforces_flatten)
        dFxx = hist_res[1]
        ax2.plot(stats.norm.pdf(dFxx, loc=df_mean * 1e3, scale=df_std * 1e3), dFxx, color='orange', ls='--',
                 label="N(mean={:.1f}/std={:.1f})".format(df_mean * 1e3, df_std * 1e3)
                 )

        yl = ax2.get_xlim()
        lim = (yl[0] - (yl[1] - yl[0]) * 0.05, yl[1])
        ax2.set_xlim(lim)
        ax2.set_ylim(ax1.get_ylim())
        plt.subplots_adjust(wspace=0.0, hspace=0.0)
        plt.savefig(os.path.join(fig_path, fig_prefix + "Fi-dFi-dist."+imagetype))

        #######################################################################
        # 4. Force-norm error dist plot
        #######################################################################
        fig, ((ax0, ax_inv), (ax1, ax2)) = plt.subplots(2, 2, dpi=150, figsize=(7, 4),
                                                        gridspec_kw={'width_ratios': [7, 1],
                                                                     'height_ratios': [1, 3]
                                                                     })

        ax_inv.set_visible(False)

        force_hist_res = ax0.hist(forces_norm, bins=150, color="gray")
        ax0.get_xaxis().set_visible(False)
        ax0.get_yaxis().set_visible(False)
        yl = ax0.get_ylim()
        lim = (yl[0] - (yl[1] - yl[0]) * 0.05, yl[1])
        ax0.set_ylim(lim)

        ##############3

        ax1.scatter(forces_norm, dforces_norm * 1e3, s=3, label="sample", color="lightsteelblue")
        ax1.set_xlabel('|F|, eV/$\\AA$')
        ax1.set_ylabel('|dF|, meV/$\\AA$')

        # aggregated over bins
        bins = force_hist_res[1]
        bins_inds = np.digitize(forces_norm, bins=bins) - 1

        de_per_bin = np.zeros_like(bins)

        for bin_ind in range(min(bins_inds), max(bins_inds) + 1):
            de_per_bin[bin_ind] = np.sqrt(np.mean(dforces_norm[bins_inds == bin_ind] ** 2))
        de_per_bin = np.nan_to_num(de_per_bin, copy=False, nan=0)
        ax1.plot(bins, de_per_bin * 1e3, color="red", lw=2, label="RMSE")

        ax1.legend()
        #################
        ax2.get_xaxis().set_visible(False)
        ax2.get_yaxis().set_visible(False)
        hist_res = ax2.hist(dforces_norm * 1e3,
                            orientation='horizontal',
                            bins=100,
                            density=True, color="gray")

        df_mean = np.mean(dforces_norm)
        df_std = np.std(dforces_norm)
        dFxx = hist_res[1]
        ax2.plot(stats.norm.pdf(dFxx, loc=df_mean * 1e3, scale=df_std * 1e3), dFxx, color='orange', ls='--'
                 )

        yl = ax2.get_xlim()
        lim = (yl[0] - (yl[1] - yl[0]) * 0.05, yl[1])
        ax2.set_xlim(lim)
        ax2.set_ylim(ax1.get_ylim())

        plt.subplots_adjust(wspace=0.0, hspace=0.0)
        plt.savefig(os.path.join(fig_path, fig_prefix + "F-dF-dist."+imagetype))

        #######################################################################
        # 5. Force-norm error dist plot
        #######################################################################
        fig, (ax1, ax2) = plt.subplots(2, 1, dpi=150, figsize=(7, 4), sharex=True)

        ax1.scatter(df_pred["nn_min"], df_pred["energy_corrected_per_atom"], s=3)
        ax1.set_ylabel('E(DFT), eV/atom')

        ax2.scatter(df_pred["nn_min"], df_pred["dEpa"] * 1e3, s=3)
        ax2.set_xlabel('d$_{NN}$, $\\AA$')
        ax2.set_ylabel('dE, meV/atom')

        fig.tight_layout()
        fig.savefig(os.path.join(fig_path, fig_prefix + "E-dE-nn."+imagetype))