Source code for zmiopc

"""Classes and Functions for the ziopcpy module."""
import numpy as np

# ZiOPC model converges extremely
# faster with import * rather than import as np.
import pandas as pd
from numpy import *
from scipy.optimize import minimize
from scipy.stats import mvn, norm


[docs]class OpModel:
    """Store model results from :py:func:`opmod`."""

    def __init__(self, llik, coef, aic, vcov, data, xs, ts, x_, yx_, yncat,
                 xstr, ystr):
        """Store model results, goodness-of-fit tests, and other information.

        :param llik: Log-Likelihood.
        :param coef: Model coefficients.
        :param aic: Model Akaike information criterion.
        :param vcov: Variance-Covariance matrix.
            (optimized as inverted Hessian matrix)
        :param data: Full dataset.
        :param ts: Cutpoints for ordered probit.
        :param xs: Ordered probit estimates (Betas).
        :param yncat: Number of categories in the outcome variable.
        :param x_: X (Covariates) Data.
        :param yx_: Y (Dependent Variable) data.
        :param xstr: List of strings for variable(s) in the outcome stage (x).
        :param ystr: List of strings for outcome variable name (y).
        """
        self.llik = llik
        self.coefs = coef
        self.AIC = aic
        self.vcov = vcov
        self.data = data
        self.cutpoints = ts
        self.ordered = xs
        self.ycat = yncat
        self.X = x_
        self.Y = yx_
        self.xstr = xstr
        self.ystr = ystr


[docs]class IopModel:
    """Store model results from :py:func:`iopmod`."""

    def __init__(
            self,
            modeltype,
            llik,
            coef,
            aic,
            vcov,
            data,
            xs,
            zs,
            ts,
            x_,
            yx_,
            z_,
            yncat,
            xstr,
            ystr,
            zstr,
    ):
        """Store model results, goodness-of-fit tests, and other information.

        :param modeltype: Type of Iop Model (ziop or miop).
        :param llik: Log-Likelihood.
        :param coef: Model coefficients
        :param aic: Model Akaike information criterion.
        :param vcov: Variance-Covariance matrix.
            (optimized as inverted Hessian matrix)
        :param data: Full dataset.
        :param ts: Cutpoints for ordered probit.
        :param zs: Inflation stage estimates (Gammas).
        :param xs: Ordered probit estimates (Betas).
        :param yncat: Number of categories in the Dependent Variable (DV).
        :param x_: X Data.
        :param yx_: Y (DV) data.
        :param z_: Z Data.
        :param xstr: List of strings for x names.
        :param ystr: List of strings for y names.
        :param zstr: List of strings for z names.

        """
        self.modeltype = modeltype
        self.llik = llik
        self.coefs = coef
        self.AIC = aic
        self.vcov = vcov
        self.data = data
        self.cutpoints = ts
        self.inflate = zs
        self.ordered = xs
        self.ycat = yncat
        self.X = x_
        self.Y = yx_
        self.Z = z_
        self.xstr = xstr
        self.ystr = ystr
        self.zstr = zstr


[docs]class IopCModel:
    """Store model results from :py:func:`iopcmod`."""

    def __init__(
            self,
            modeltype,
            llik,
            coef,
            aic,
            vcov,
            data,
            xs,
            zs,
            ts,
            x_,
            yx_,
            z_,
            rho,
            yncat,
            xstr,
            ystr,
            zstr,
    ):
        """Store model results, goodness-of-fit tests, and other information.

        :param modeltype: Type of IopC Model (ziopc or miopc).
        :param llik: Log-Likelihood.
        :param coef: Model coefficients.
        :param AIC: Model Akaike information criterion.
        :param vcov: Variance-Covariance matrix.
            (optimized as inverted Hessian matrix)
        :param data: Full dataset.
        :param ts: Cutpoints for ordered probit.
        :param zs: Inflation stage estimates (Gammas).
        :param xs: Ordered probit estimates (Betas).
        :param rho: Rho.
        :param yncat: Number of categories in Dependent Variable (DV).
        :param x_: X Data.
        :param yx_: Y (DV) data.
        :param z_: Z Data.
        :param xstr: List of strings for x names.
        :param ystr: List of strings for y names.
        :param zstr: List of strings for z names.
        """
        self.modeltype = modeltype
        self.llik = llik
        self.coefs = coef
        self.AIC = aic
        self.vcov = vcov
        self.data = data
        self.cutpoints = ts
        self.inflate = zs
        self.ordered = xs
        self.ycat = yncat
        self.X = x_
        self.Y = yx_
        self.Z = z_
        self.r = rho
        self.xstr = xstr
        self.ystr = ystr
        self.zstr = zstr


[docs]class FittedVals:
    """Store fitted values for iOP models."""

    def __init__(self, responsefull, responseordered, responseinflation,
                 linear):
        """Store different type of equation in each attribute.

        :param responsefull: responsefull
        :param responseordered: responseordered
        :param responseinflation: responseinflation
        :param linear: linear
        """
        self.responsefull = responsefull
        self.responseordered = responseordered
        self.responseinflation = responseinflation
        self.linear = linear


[docs]def op(pstart, x, y, data, weights, offsetx):
    """Calculate likelihood function for Ordered Probit Model.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param x: Covariates for the ordered stage.
    :type x: pandas.DataFrame
    :param y: The dependent variable (DV).
    :type y: pandas.DataFrame
    :param data: Dataset.
    :type data: pandas.DataFrame
    :param weights: Weights.
    :param offsetx: Offset for covariates in the ordered stage.
    """
    n = len(data)
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((len(data), yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    tau = np.repeat(1.0, yncat)
    for j in range(yncat - 1):
        if j == 0:
            tau[j] = pstart[j]
        else:
            tau[j] = tau[j - 1] + np.exp(pstart[j])
    beta = pstart[(yncat - 1): len(pstart)]
    xb = x.dot(beta) + offsetx
    cprobs = np.zeros((n, yncat))
    probs = np.zeros((n, yncat))
    for i in range(yncat - 1):
        cprobs[:, i] = norm.cdf(tau[i] - xb)
    probs[:, 0] = cprobs[:, 0]
    for i in range(1, yncat - 1):
        probs[:, i] = cprobs[:, i] - cprobs[:, (i - 1)]
    probs[:, yncat - 1] = 1 - cprobs[:, (yncat - 2)]
    lik = np.zeros((n, yncat))
    for k in range(n):
        for j in range(yncat):
            lik[k, j] = v[k, j] * probs[k, j]
    likk = np.log(lik[lik != 0])
    llik = -1 * sum(likk * weights)
    return llik


[docs]def ziop(pstart, x, y, z, data, weights, offsetx, offsetz):
    """Calculate likelihood function for Zero-inflated Model.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param x: Covariates for the ordered probit stage. 
    :type x: pandas.DataFrame
    :param y: The ordinal dependent Variable (DV). 
    :type y: pandas.DataFrame
    :param z: Covariates for the inflation stage. 
    :type z: pandas.DataFrame
    :param data: Dataset with missing values listwise deleted.
    :type data: pandas.DataFrame
    :param weights: weights.
    :type weights: float
    :param offsetx: Offset for the ordered stage covariates (X). 
    :type offsetx: float
    :param offsetz: Offset for the inflation stage covariates (Z).
    :type offsetz: float
    """
    n = len(data)
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((len(data), yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    tau = np.repeat(1.0, yncat)
    for j in range(yncat - 1):
        if j == 0:
            tau[j] = pstart[j]
        else:
            tau[j] = tau[j - 1] + np.exp(pstart[j])
    beta = pstart[(yncat + len(z.columns) - 1): len(pstart)]
    gamma = pstart[(yncat - 1): (yncat + len(z.columns) - 1)]
    zg = z.dot(gamma) + offsetz
    xb = x.dot(beta) + offsetx
    cprobs = np.zeros((n, yncat))
    probs = np.zeros((n, yncat))
    for i in range(yncat - 1):
        cprobs[:, i] = norm.cdf(tau[i] - xb)
    probs[:, 0] = (cprobs[:, 0]) * norm.cdf(zg) + (1 - norm.cdf(zg))
    for i in range(1, yncat - 1):
        probs[:, i] = (cprobs[:, i] - cprobs[:, (i - 1)]) * norm.cdf(zg)
    probs[:, yncat - 1] = (1 - cprobs[:, (yncat - 2)]) * norm.cdf(zg)
    lik = np.zeros((n, yncat))
    for k in range(n):
        for j in range(yncat):
            lik[k, j] = v[k, j] * probs[k, j]
    likk = np.log(lik[lik != 0])
    llik = -1 * sum(likk * weights)
    return llik


[docs]def ziopc(pstart, x, y, z, data, weights, offsetx, offsetz):
    """Calculate likelihood function for Zero-inflated Correlated-Errors Model.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param x: Covariates for the ordered probit stage. 
    :type x: pandas.DataFrame
    :param y: The dependent variable (DV).
    :type y: pandas.DataFrame
    :param z: Covariates for the inflation stage. 
    :type z: pandas.DataFrame
    :param data: Dataset with missing values listwise deleted.
    :type data: pandas.DataFrame
    :param weights: Weights.
    :type weights: float
    :param offsetx: Offset for the ordered probit stage covariates (X).
    :type offsetx: float
    :param offsetz: Offset for the inflation stage (Z).
    :type offsetz: float
    """
    n = len(data)
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((len(data), yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    tau = np.repeat(1.0, yncat)
    for j in range(yncat - 1):
        if j == 0:
            tau[j] = pstart[j]
        else:
            tau[j] = tau[j - 1] + np.exp(pstart[j])
    beta = pstart[(yncat + len(z.columns) - 1): len(pstart) - 1]
    gamma = pstart[(yncat - 1): (yncat + len(z.columns) - 1)]
    x_beta = x.dot(beta)
    # correlation
    rho = pstart[len(pstart) - 1]
    cprobs = np.zeros((len(x_beta), yncat))
    probs = np.zeros((len(x_beta), yncat))
    cut = np.zeros((len(x_beta), yncat))
    cutb = np.zeros((len(x_beta), yncat))
    zg = z.dot(gamma) + offsetz
    xb = x.dot(beta) + offsetx
    means = np.array([0, 0])
    lower = np.array([-inf, -inf])
    sigma = np.array([[1, rho], [rho, 1]])
    nsigma = np.array([[1, -rho], [-rho, 1]])
    for i in range(yncat - 1):
        cprobs[:, i] = norm.cdf(tau[i] - xb)
        cut[:, i] = tau[i] - xb
        cutb[:, i] = xb - tau[i]
    upperb = np.zeros((len(x_beta), 2))
    upper = np.zeros((len(x_beta), 2))
    for j in range(n):
        upperb[j, :] = [zg[j], cutb[j, yncat - 2]]
        upper[j, :] = [zg[j], cut[j, 0]]
        probs[j, yncat - 1] = mvn.mvnun(lower, upperb[j], means, sigma)[0]
        probs[j, 0] = (1 - norm.cdf(zg[j])) + mvn.mvnun(lower, upper[j],
                                                        means, nsigma)[0]
    for j in range(n):
        for i in range(1, yncat - 1):
            probs[j, i] = (
                    mvn.mvnun(lower, [zg[j],
                                      cut[j, i]], means, nsigma)[0]
                    - mvn.mvnun(lower, [zg[j],
                                        cut[j, i - 1]], means, nsigma)[0])
    lik = np.zeros((n, yncat))
    for k in range(n):
        for j in range(yncat):
            lik[k, j] = v[k, j] * probs[k, j]
    likk = np.log(lik[lik != 0])
    llik = -1 * sum(likk * weights)
    return llik


[docs]def miop(pstart, x, y, z, data, weights, offsetx, offsetz):
    """
    Likelihood function for Middle-inflated Ordered Probit Model
    "without" correlated errors.

    The number of categories in the dependent
    variable must be odd.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param x: Covariates in the ordered probit stage. 
    :type x: pandas.DataFrame
    :param y: The dependent variable (DV).
    :type y: pandas.DataFrame
    :param z: Covariates in the inflation stage. 
    :type z: pandas.DataFrame
    :param data: Dataset with missing values listwise deleted.
    :type data: pandas.DataFrame
    :param weights: Weights.
    :type weights: float
    :param offsetx: Offset for covariates in the ordered probit stage (X).
    :type offsetx: float
    :param offsetz: Offset for covariates in the inflation stage.
    :type offsetz: float
    """
    n = len(data)
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((len(data), yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    tau = np.repeat(1.0, yncat)
    for j in range(yncat - 1):
        if j == 0:
            tau[j] = pstart[j]
        else:
            tau[j] = tau[j - 1] + np.exp(pstart[j])
    beta = pstart[(yncat + len(z.columns) - 1): len(pstart)]
    gamma = pstart[(yncat - 1): (yncat + len(z.columns) - 1)]
    zg = z.dot(gamma) + offsetz
    xb = x.dot(beta) + offsetx
    cprobs = np.zeros((n, yncat))
    probs = np.zeros((n, yncat))
    for i in range(yncat - 1):
        cprobs[:, i] = norm.cdf(tau[i] - xb)
    probs[:, 0] = (cprobs[:, 0]) * norm.cdf(zg)
    for i in range(1, yncat - 1):
        if i == np.median(range(yncat)):
            probs[:, i] = (1 - norm.cdf(zg)) + (
                    norm.cdf(zg) * (cprobs[:, i] - cprobs[:, (i - 1)])
            )
        else:
            probs[:, i] = norm.cdf(zg) * (cprobs[:, i] - cprobs[:, (i - 1)])
    probs[:, yncat - 1] = (1 - cprobs[:, (yncat - 2)]) * norm.cdf(zg)
    lik = np.zeros((n, yncat))
    for k in range(n):
        for j in range(yncat):
            lik[k, j] = v[k, j] * probs[k, j]
    likk = np.log(lik[lik != 0])
    llik = -1 * sum(likk * weights)
    return llik


[docs]def miopc(pstart, x, y, z, data, weights, offsetx, offsetz):
    """
    Likelihood function for Middle-inflated Correlated-Errors Model.
    
    The number of categories in the dependent variable must be odd.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param x: Covariates in the ordered probit stage.
    :type x: pandas.DataFrame
    :param y: The dependent variable (DV). 
    :type y: pandas.DataFrame
    :param z: Covariates in the inflation stage.
    :type z: pandas.DataFrame
    :param data: Dataset with missing values listwise deleted.
    :type data: pandas.DataFrame
    :param weights: Weights.
    :type weights: float
    :param offsetx: Offset for the ordered probit stage covariates (X).
    :type offsetx: float
    :param offsetz: Offset for the covariates in the inflation stage (Z).
    :type offsetz: float
    """
    n = len(data)
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((len(data), yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    tau = np.repeat(1.0, yncat)
    for i in range(yncat - 1):
        if i == 0:
            tau[i] = pstart[i]
        else:
            tau[i] = tau[i - 1] + np.exp(pstart[i])
    beta = pstart[(yncat + len(z.columns) - 1): len(pstart) - 1]
    gamma = pstart[(yncat - 1): (yncat + len(z.columns) - 1)]
    x_beta = x.dot(beta)
    rho = pstart[len(pstart) - 1]
    cprobs = np.zeros((len(x_beta), yncat))
    probs = np.zeros((len(x_beta), yncat))
    cutpoint = np.zeros((len(x_beta), yncat))
    cutpointb = np.zeros((len(x_beta), yncat))
    zg = z.dot(gamma) + offsetz
    xb = x.dot(beta) + offsetx
    means = np.array([0, 0])
    lower = np.array([-inf, -inf])
    sigma = np.array([[1, rho], [rho, 1]])
    nsigma = np.array([[1, -rho], [-rho, 1]])
    for i in range(yncat - 1):
        cprobs[:, i] = norm.cdf(tau[i] - xb)
        cutpoint[:, i] = tau[i] - xb
        cutpointb[:, i] = xb - tau[i]
    upperb = np.zeros((len(x_beta), 2))
    upper = np.zeros((len(x_beta), 2))
    for j in range(n):
        upperb[j, :] = [zg[j], cutpointb[j, yncat - 2]]
        upper[j, :] = [zg[j], cutpoint[j, 0]]
        probs[j, yncat - 1] = mvn.mvnun(lower, upperb[j], means, sigma)[0]
        probs[j, 0] = mvn.mvnun(lower, upper[j], means, nsigma)[0]
    for i in range(n):
        for j in range(1, yncat - 1):
            if j == np.median(range(yncat)):
                probs[i, j] = (
                        (1 - norm.cdf(zg[i]))
                        + mvn.mvnun(lower, [zg[i], cutpoint[i, j]], means,
                                    nsigma)[0]
                        - mvn.mvnun(lower, [zg[i], cutpoint[i, j - 1]], means,
                                    nsigma)[0]
                )
            else:
                probs[i, j] = (
                        mvn.mvnun(lower, [zg[i], cutpoint[i, j]], means,
                                  nsigma)[0]
                        - mvn.mvnun(lower, [zg[i], cutpoint[i, j - 1]], means,
                                    nsigma)[0]
                )
    lik = np.zeros((n, yncat))
    for k in range(n):
        for j in range(yncat):
            lik[k, j] = v[k, j] * probs[k, j]
    likk = np.log(lik[lik != 0])
    llik = -1 * sum(likk * weights)
    return llik


[docs]def opresults(model, data, x, y):
    """Produces estimation results, part of :py:func:`opmod`.

    :param model: Model estimation results obtained from minimization.
    :param data: Dataset.
    :type data: pandas.DataFrame
    :param x: List of names for independent variables matching column names
        in data.
    :type x: list of str
    :param y: : List of name for dependent Variablematching column names
        in data.
    """
    varlist = np.unique(y + x)
    dataset = data[varlist]
    datasetnew = dataset.dropna(how="any")
    datasetnew = datasetnew.reset_index(drop=True)
    x_ = datasetnew[x]
    y_ = datasetnew[y]
    yx_ = y_.iloc[:, 0]
    yncat = len(np.unique(yx_))
    names = list()
    for s in range(1, yncat):
        names.append("cut" + str(s))
    for s in range(x_.shape[1]):
        names.append(x_.columns[s])
    ts = model.x[0: yncat - 1]
    xs = model.x[(yncat - 1): (yncat + x_.shape[1] - 1)]
    ses = np.sqrt(np.diag(model.hess_inv))
    tscore = model.x / ses
    pval = (1 - (norm.cdf(abs(tscore)))) * 2
    lci = model.x - 1.96 * ses
    uci = model.x + 1.96 * ses
    coef = pd.DataFrame(
        {
            "Coef": model.x,
            "SE": ses,
            "tscore": tscore,
            "p": pval,
            "2.5%": lci,
            "97.5%": uci,
        },
        names,
    )
    aic = -2 * (-model.fun) + 2 * (len(coef))
    results = OpModel(
        model.fun, coef, aic, model.hess_inv, datasetnew, xs, ts, x_, yx_,
        yncat, x, y
    )
    return results


[docs]def opmod(data, x, y, pstart=None, method="BFGS", weights=1, offsetx=0):
    """Estimates Ordered Probit model and returns :class:`OpModel` class object.

    :param pstart: A list of starting values for the estimation. 
        Length of the number of parameters to be estimated.  
    :type pstart: list
    :param data: Full dataset used for estimation.
    :type data: pandas.DataFrame
    :type x: list of str
    :param y: Dependent Variable (DV).
    :type y: list of str
    :param method: Method for optimization, default is 'BFGS'. For other
        available methods, see scipy.optimize.minimize documentation.
    :param weights: Weights.
    :param offsetx: Offset for covariates (X).
    :return: OpModel
    """
    varlist = np.unique(y + x)
    dataset = data[varlist]
    datasetnew = dataset.dropna(how="any")
    x_ = datasetnew[x]
    y_ = datasetnew[y]
    yx_ = y_.iloc[:, 0]
    yncat = len(np.unique(yx_))
    if pstart is None:
        pstart = np.repeat(0.01, ((yncat - 1) + len(x_.columns)))
    model = minimize(
        op,
        pstart,
        args=(x_, yx_, datasetnew, weights, offsetx),
        method=method,
        options={"disp": True, "maxiter": 500},
    )
    results = opresults(model, data, x, y)
    return results


[docs]def iopresults(model, data, x, y, z, modeltype):
    """Produce estimation results, part of :py:func:`iopmod`.

    :param model: Model estimation results obtained from minimization.
    :param data: Full dataset used for estimation.
    :type data: pandas.DataFrame
    :param x: List of names for covariates in the ordered probit stage.
    :type x: list of str
    :param y: : List of names for dependent cariable.
    :type y: list of str
    :param z: : List of names for covariates in the inflation stage.
    :type z: list of str
    :param modeltype: : Type of model. Options are: 'ziop' or 'miop'.
    """
    varlist = np.unique(y + z + x)
    dataset = data[varlist]
    datasetnew = dataset.dropna(how="any")
    datasetnew = datasetnew.reset_index(drop=True)
    x_ = datasetnew[x]
    y_ = datasetnew[y]
    yx_ = y_.iloc[:, 0]
    yncat = len(np.unique(yx_))
    z_ = datasetnew[z]
    z_.insert(0, "int", np.repeat(1, len(z_)))
    names = list()
    for s in range(1, yncat):
        names.append("cut" + str(s))
    for s in range(z_.shape[1]):
        names.append("Inflation: " + z_.columns[s])
    for s in range(x_.shape[1]):
        names.append("Ordered: " + x_.columns[s])
    ts = model.x[0: yncat - 1]
    zs = model.x[yncat - 1: (yncat + z_.shape[1] - 1)]
    xs = model.x[
         (yncat + z_.shape[1] - 1): (yncat + z_.shape[1] + x_.shape[1] - 1)]
    ses = np.sqrt(np.diag(model.hess_inv))
    tscore = model.x / ses
    pval = (1 - (norm.cdf(abs(tscore)))) * 2
    lci = model.x - 1.96 * ses
    uci = model.x + 1.96 * ses
    coef = pd.DataFrame(
        {
            "Coef": model.x,
            "SE": ses,
            "tscore": tscore,
            "p": pval,
            "2.5%": lci,
            "97.5%": uci,
        },
        names,
    )
    aic = -2 * (-model.fun) + 2 * (len(coef))
    results = IopModel(
        modeltype,
        model.fun,
        coef,
        aic,
        model.hess_inv,
        datasetnew,
        xs,
        zs,
        ts,
        x_,
        yx_,
        z_,
        yncat,
        x,
        y,
        z,
    )
    return results


[docs]def iopcresults(model, data, x, y, z, modeltype):
    """Produce estimation results, part of :py:func:`ziopc  mod`.

    :param model: Model estimation results obtained from minimization.
    :param data: Full dataset used for estimation.
    :type data: pandas.DataFrame
    :param x: List of names for covariates in the ordered probit stage.
    :type x: list of str
    :param y: : List of names for dependent cariable.
    :type y: list of str
    :param z: : List of names for covariates in the inflation stage.
    :type z: list of str
    :param modeltype: : Type of model. Options are: 'ziopc' or 'miopc'
    """
    varlist = np.unique(y + z + x)
    dataset = data[varlist]
    datasetnew = dataset.dropna(how="any")
    datasetnew = datasetnew.reset_index(drop=True)
    x_ = datasetnew[x]
    y_ = datasetnew[y]
    yx_ = y_.iloc[:, 0]
    yncat = len(np.unique(yx_))
    z_ = datasetnew[z]
    z_.insert(0, "int", np.repeat(1, len(z_)))
    names = list()
    for s in range(1, yncat):
        names.append("cut" + str(s))
    for s in range(z_.shape[1]):
        names.append("Inflation: " + z_.columns[s])
    for s in range(x_.shape[1]):
        names.append("Ordered: " + x_.columns[s])
    names.append("rho")
    ts = model.x[0: yncat - 1]
    zs = model.x[yncat - 1: (yncat + z_.shape[1] - 1)]
    xs = model.x[
         (yncat + z_.shape[1] - 1): (yncat + z_.shape[1] + x_.shape[1] - 1)]
    rho = model.x[-1]
    ses = np.sqrt(np.diag(model.hess_inv))
    tscore = model.x / ses
    pval = (1 - (norm.cdf(abs(tscore)))) * 2
    lci = model.x - 1.96 * ses
    uci = model.x + 1.96 * ses
    coef = pd.DataFrame(
        {
            "Coef": model.x,
            "SE": ses,
            "tscore": tscore,
            "p": pval,
            "2.5%": lci,
            "97.5%": uci,
        },
        names,
    )
    aic = -2 * (-model.fun) + 2 * (len(coef))
    results = IopCModel(
        modeltype,
        model.fun,
        coef,
        aic,
        model.hess_inv,
        datasetnew,
        xs,
        zs,
        ts,
        x_,
        yx_,
        z_,
        rho,
        yncat,
        x,
        y,
        z,
    )
    return results


[docs]def iopmod(
        modeltype,
        data,
        x,
        y,
        z,
        pstart=None,
        method="BFGS",
        weights=1,
        offsetx=0,
        offsetz=0,
):
    """Estimate ZiOP model and return :class:`IopModel` class object as output.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param data: Full dataset used for estimation.
    :type data: pandas.DataFrame
    :param x: Covariates in the ordered probit stage.
        Elements must match column names of ``data``.
    :type x: list of str
    :param y: Dependent variable (DV). Element must match column
        names of ``data``.
    :type y: list of str
    :param z: Inflation stage variable. Elements must match column
        names of ``data``.
    :type z: list of str
    :param modeltype: Type of model to be estimated.
        Options are: "ziop" or 'miop'.
    :param method: Method for optimization, default 'BFGS'.  For other
        available methods, see scipy.optimize.minimize documentation.
    :param weights: Weights.
    :param offsetx: Offset for ordered probit stage covariates (X).
    :param offsetz: Offset for inflation stage covariates (Z).
    :return: IopModel
    """
    types = ["ziop", "miop"]
    if modeltype in types:
        varlist = np.unique(y + z + x)
        dataset = data[varlist]
        datasetnew = dataset.dropna(how="any")
        x_ = datasetnew[x]
        y_ = datasetnew[y]
        yx_ = y_.iloc[:, 0]
        yncat = len(np.unique(yx_))
        z_ = datasetnew[z]
        z_.insert(0, "ones", np.repeat(1, len(z_)))
        if pstart is None:
            pstart = np.repeat(0.01, (
                    (yncat - 1) + len(x_.columns) + len(z_.columns)))
        if modeltype == "ziop":
            model = minimize(
                ziop,
                pstart,
                args=(x_, yx_, z_, datasetnew, weights, offsetx, offsetz),
                method=method,
                options={"disp": True, "maxiter": 500},
            )
        elif modeltype == "miop":
            if len(np.unique(y_.astype("category").iloc[:, 0])) % 2 == 1:
                model = minimize(
                    miop,
                    pstart,
                    args=(x_, yx_, z_, datasetnew, weights, offsetx, offsetz),
                    method=method,
                    options={"disp": True, "maxiter": 500},
                )
            else:
                raise Exception("miop requires odd number of categories.")
        results = iopresults(model, data, x, y, z, modeltype)
        return results
    else:
        raise Exception("type must be ziop or miop")


[docs]def iopcmod(
        modeltype,
        data,
        x,
        y,
        z,
        pstart=None,
        method="BFGS",
        weights=1,
        offsetx=0,
        offsetz=0,
):
    """Estimate an iOP model (ZiOP or MiOP) and return :class:`IopcModel`.

    :param pstart: A list of starting values for the estimation.
        Length of the number of parameters to be estimated.
    :type pstart: list
    :param data: Dataset used for estimation.
    :type data: pandas.DataFrame
    :param x: Covariates for the ordered probit stage.
        Elements must match column names of ``data``.
    :type x: list of str
    :param y: The dependent variable (DV). Element must match column
        names of ``data``.
    :type y: list of str
    :param z: Covariates for the inflation stage. Elements must match column
        names of ``data``.
    :type z: list of str
    :param modeltype: Type of model to be estimated.
        Options are: 'ziopc' or 'miopc'.
    :param method: Method for optimization, default is 'BFGS'. For other
        available methods, see scipy.optimize.minimize documentation.
    :param weights: Weights.
    :param offsetx: Offset for variables in ordered probit stage (X).
    :param offsetz: Offset for variables in inflation stage (Z).
    :return: IopCModel
    """
    types = ["ziopc", "miopc"]
    if modeltype in types:
        varlist = np.unique(y + z + x)
        dataset = data[varlist]
        datasetnew = dataset.dropna(how="any")
        datasetnew = datasetnew.reset_index(drop=True)
        x_ = datasetnew[x]
        y_ = datasetnew[y]
        yx_ = y_.iloc[:, 0]
        yncat = len(np.unique(yx_))
        z_ = datasetnew[z]
        z_.insert(0, "ones", np.repeat(1, len(z_)))
        if pstart is None:
            pstart = np.repeat(
                0.01, ((yncat - 1) + len(x_.columns) + len(z_.columns) + 1)
            )
        if modeltype == "ziopc":
            model = minimize(
                ziopc,
                pstart,
                args=(x_, yx_, z_, datasetnew, weights, offsetx, offsetz),
                method=method,
                options={"disp": True, "maxiter": 500},
            )
        elif modeltype == "miopc":
            if len(np.unique(y_.astype("category").iloc[:, 0])) % 2 == 1:
                model = minimize(
                    miopc,
                    pstart,
                    args=(x_, yx_, z_, datasetnew, weights, offsetx, offsetz),
                    method=method,
                    options={"disp": True, "maxiter": 500},
                )
            else:
                raise Exception("miopc requires odd number of categories.")
                return
        results = iopcresults(model, data, x, y, z, modeltype)
        return results
    else:
        raise Exception("type must be ziopc or miopc")


[docs]def iopfit(model):
    """Calculate probabilities from :py:func:`iopmod`.

    :param model: :class:IopModel object from :py:func:`iopmod`.
    :return: :class:FittedVals object with fitted values.
    """
    zg = model.Z.dot(model.inflate)
    xb = model.X.dot(model.ordered)
    cprobs = np.zeros((model.ycat - 1, 1))
    n = len(model.data)
    probs = np.zeros((n, model.ycat))
    cprobs[0, 0] = model.cutpoints[0]
    if model.modeltype == "ziop":
        for j in range(1, model.ycat - 1):
            cprobs[j, 0] = cprobs[j - 1, 0] + np.exp(model.cutpoints[j])
        probs[:, model.ycat - 1] = (norm.cdf(zg)) * (
                1 - norm.cdf(cprobs[model.ycat - 2, 0] - xb)
        )
        probs[:, 0] = (1 - norm.cdf(zg)) + (norm.cdf(zg)) * (
            norm.cdf(cprobs[0, 0] - xb)
        )
        for j in range(1, model.ycat - 1):
            probs[:, j] = (norm.cdf(zg)) * ((norm.cdf(cprobs[j, 0] - xb))
                                            - (norm.cdf(cprobs[j - 1, 0]
                                                        - xb)))
    elif model.modeltype == "miop":
        for j in range(1, model.ycat - 1):
            cprobs[j, 0] = cprobs[j - 1, 0] + np.exp(model.cutpoints[j])
        probs[:, model.ycat - 1] = (norm.cdf(zg)) * (
                1 - norm.cdf(cprobs[model.ycat - 2, 0] - xb)
        )
        probs[:, 0] = norm.cdf(zg) * norm.cdf(cprobs[0, 0] - xb)

        for i in range(1, model.ycat - 1):
            if i == np.median(range(model.ycat)):
                probs[:, i] = (1 - norm.cdf(zg)) + (
                        norm.cdf(zg)
                        * (norm.cdf(cprobs[j, 0] - xb)
                           - norm.cdf(cprobs[j - 1, 0] - xb)))
            else:
                probs[:, i] = norm.cdf(zg) * (
                        norm.cdf(cprobs[:, i] - xb)
                        - norm.cdf(cprobs[j - 1, 0] - xb))
    probsordered = np.zeros((n, model.ycat))
    probsordered[:, model.ycat - 1] = 1 - norm.cdf(
        cprobs[model.ycat - 2, 0] - xb)
    probsordered[:, 0] = norm.cdf(cprobs[0, 0] - xb)
    for j in range(1, model.ycat - 1):
        probsordered[:, j] = (norm.cdf(cprobs[j, 0] - xb)) - (
            norm.cdf(cprobs[j - 1, 0] - xb)
        )
    probsinfl = np.zeros((n, 1))
    probsinfl[:, 0] = 1 - norm.cdf(zg)
    probslin = pd.DataFrame({"ZG": zg, "XB": xb})
    fitted = FittedVals(probs, probsordered, probsinfl, probslin)
    return fitted


[docs]def iopcfit(model):
    """Calculate fitted probabilities from :py:func:`iopcmod`.

    :param model: :class:`IopCModel` object from :py:func:`iopcmod`.
    :return: :class:`FittedVals` object with fitted values.
    """
    zg = model.Z.dot(model.inflate)
    xb = model.X.dot(model.ordered)
    cprobs = np.zeros((model.ycat - 1, 1))
    n = len(model.data)
    probs = np.zeros((n, model.ycat))
    cprobs[0, 0] = model.cutpoints[0]
    rho = model.coefs.iloc[-1, 0]
    means = np.array([0, 0])
    lower = np.array([-inf, -inf])
    sigma = np.array([[1, rho], [rho, 1]])
    nsigma = np.array([[1, -rho], [-rho, 1]])
    if model.modeltype == "ziopc":
        for j in range(1, model.ycat - 1):
            cprobs[j, 0] = cprobs[j - 1, 0] + np.exp(model.cutpoints[j])
        for i in range(n):
            probs[i, model.ycat - 1] = mvn.mvnun(
                lower, [zg[i], (xb[i] - cprobs[model.ycat - 2][0])], means,
                sigma
            )[0]
            probs[i, 0] = (1 - norm.cdf(zg[i])) + mvn.mvnun(
                lower, [zg[i], (cprobs[0][0] - xb[i])], means, nsigma
            )[0]
        for i in range(n):
            for j in range(1, model.ycat - 1):
                probs[i, j] = (
                                  mvn.mvnun(lower,
                                            [zg[i], (cprobs[j][0] - xb[i])],
                                            means, nsigma)[0]
                              ) - (
                                  mvn.mvnun(
                                      lower,
                                      [zg[i], (cprobs[j - 1][0] - xb[i])],
                                      means, nsigma
                                  )[0]
                              )
    elif model.modeltype == "miopc":
        for j in range(1, model.ycat - 1):
            cprobs[j, 0] = cprobs[j - 1, 0] + np.exp(model.cutpoints[j])
        for i in range(n):
            probs[i, model.ycat - 1] = mvn.mvnun(
                lower, [zg[i], (xb[i] - cprobs[model.ycat - 2][0])], means,
                sigma
            )[0]
            probs[i, 0] = mvn.mvnun(
                lower, [zg[i], (cprobs[0][0] - xb[i])], means, nsigma
            )[0]
        for i in range(n):
            for j in range(1, model.ycat - 1):
                if j == np.median(range(model.ycat)):
                    probs[i, j] = (1 - norm.cdf(zg[i])) + (
                            (mvn.mvnun(
                                lower, [zg[i], (cprobs[j][0] - xb[i])],
                                means, nsigma)[0])
                            - (mvn.mvnun(lower,
                                         [zg[i], (cprobs[j - 1][0] -
                                                  xb[i])],
                                         means, nsigma)[0]))
                else:
                    probs[i, j] = (mvn.mvnun(lower,
                                             [zg[i], (cprobs[j][0] - xb[i])],
                                             means, nsigma)[0]) - (
                                      mvn.mvnun(lower, [zg[i],
                                                        (cprobs[j - 1][0] -
                                                         xb[i])],
                                                means, nsigma)[0])

    # ordered
    probsordered = np.zeros((n, model.ycat))
    probsordered[:, model.ycat - 1] = 1 - norm.cdf(
        cprobs[model.ycat - 2, 0] - xb)
    probsordered[:, 0] = norm.cdf(cprobs[0, 0] - xb)
    for j in range(1, model.ycat - 1):
        probsordered[:, j] = norm.cdf(cprobs[j, 0] - xb) - (
            norm.cdf(cprobs[j - 1, 0] - xb)
        )

    probsinfl = np.zeros((n, 1))
    probsinfl[:, 0] = 1 - norm.cdf(zg)
    probslin = pd.DataFrame({"zg": zg, "xb": xb})
    fitted = FittedVals(probs, probsordered, probsinfl, probslin)
    return fitted


[docs]def vuong_opiop(opmodel, iopmodel):
    """Run the Vuong test to compare the performance of the OP and iOP model.

    :param opmodel: The OP model from :class:`OpModel`.
    :param iopmodel: The ZiOP model from :class:`IopModel`.
    :return: vuongopiop: Result of the Vuong test.
    """
    n1 = len(opmodel.data)
    y = iopmodel.Y
    # can also y = opmodel.Y when 2 models have the same length
    x = iopmodel.X
    # can also x = opmodel.X when 2 models have the same length
    cuts_op = np.repeat(0, len(opmodel.cutpoints)).astype(float)
    xop = opmodel.ordered
    cuts_op[0] = opmodel.cutpoints[0]
    for i in range(1, len(opmodel.cutpoints)):
        cuts_op[i] = cuts_op[i - 1] + np.exp(opmodel.cutpoints[i])
    xbop = pd.DataFrame(index=np.arange(n1), columns=np.arange(len(x.columns)))
    for j in range(len(x.columns)):
        xbop.iloc[:, j] = xop[j] * x.iloc[:, j]
    xbop_sum = xbop.sum(axis=1)
    fitttediop = iopfit(iopmodel).responsefull
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((n1, yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    m = np.zeros(n1)
    probs = np.zeros((n1, yncat))
    probs[:, 0] = norm.cdf(cuts_op[0] - xbop_sum) / fitttediop[:, 0]
    probs[:, yncat - 1] = (1 - norm.cdf(
        cuts_op[yncat - 2] - xbop_sum)) / fitttediop[:, yncat - 1]
    for i in range(1, yncat - 1):
        probs[:, i] = (norm.cdf(cuts_op[i] - xbop_sum)
                       - norm.cdf(cuts_op[i - 1]
                                  - xbop_sum)) / fitttediop[:, i]
    m = np.zeros((n1, yncat))
    for k in range(n1):
        for j in range(yncat):
            m[k, j] = v[k, j] * probs[k, j]
    m2 = m[m != 0]
    mlog = np.log(m2)
    diffmsq = (mlog - np.mean(mlog)) ** 2
    sumdms = sum(diffmsq)
    vuongopiop = (np.sqrt(n1) * (1 / n1) * sum(mlog)) / (
        np.sqrt((1 / n1) * sumdms))
    return vuongopiop


[docs]def vuong_opiopc(opmodel, iopcmodel):
    """Run the Vuong test to compare the performance of the OP and iOPC model.

    :param opmodel: The OP model from :class:`OpModel`.
    :param iopcmodel: The iOPC model from :class:`IopCModel`.
    :return: vuongopiopc: Result of the Vuong test.
    """
    n1 = len(opmodel.data)
    y = iopcmodel.Y
    x = iopcmodel.X
    cuts_op = np.repeat(0, len(opmodel.cutpoints)).astype(float)
    xop = opmodel.ordered
    cuts_op[0] = opmodel.cutpoints[0]
    for i in range(1, len(opmodel.cutpoints)):
        cuts_op[i] = cuts_op[i - 1] + np.exp(opmodel.cutpoints[i])
    xbop = pd.DataFrame(index=np.arange(n1), columns=np.arange(len(x.columns)))
    for j in range(len(x.columns)):
        xbop.iloc[:, j] = xop[j] * x.iloc[:, j]
    xbop_sum = xbop.sum(axis=1)
    fitttedziopc = iopcfit(iopcmodel).responsefull
    ycat = y.astype("category")
    ycatu = np.unique(ycat)
    yncat = len(ycatu)
    y0 = np.sort(ycatu)
    v = np.zeros((n1, yncat))
    for j in range(yncat):
        v[:, j] = y == y0[j]
    probs = np.zeros((n1, yncat))
    probs[:, 0] = norm.cdf(cuts_op[0] - xbop_sum) / fitttedziopc[:, 0]
    probs[:, yncat - 1] = (1 - norm.cdf(
        cuts_op[yncat - 2] - xbop_sum)) / fitttedziopc[:, yncat - 1]
    for i in range(1, yncat - 1):
        probs[:, i] = (norm.cdf(cuts_op[i] - xbop_sum) - norm.cdf(
            cuts_op[i - 1] - xbop_sum)) / fitttedziopc[:, i]
    m = np.zeros((n1, yncat))
    for k in range(n1):
        for j in range(yncat):
            m[k, j] = v[k, j] * probs[k, j]
    m2 = m[m != 0]
    mlog = np.log(m2)
    diffmsq = (mlog - np.mean(mlog)) ** 2
    sumdms = sum(diffmsq)
    vuongopiopc = (np.sqrt(n1) * (1 / n1) * sum(mlog)) / (
        np.sqrt((1 / n1) * sumdms))
    return vuongopiopc


[docs]def split_effects(model, inflvar, nsims=10000):
    """Calculate change in probability of being 0 in the split-probit stage.

    This function calculates the predicted probabilities
    when there is change in value of a variable in the split-probit equation.
    The chosen dummy variable is changed from 0 to 1,
    and chosen numerical variable is mean value + 1 standard deviation.
    Other variables are kept at 0 or mean value
    (Note: the current version of the function
    recognizes ordinal variables as numerical).

    :param model: :class:`IopModel` or :class:`IopCModel`.
    :param inflvar: Number representing the location of variable
        in the split-probit equation.
        (attribute .inflate of :class:`IopModel` or :class:`IopCModel`)
    :type inflvar: int
    :param nsims: Number of simulated observations, default is 10000.
    :type nsims: int
    :return: changeprobs: A dataframe of the predicted
        probabilities when there is change in the variable (1)
        versus original values (0).
    """
    estimate = model.coefs.iloc[:, 0]
    vcov = model.vcov
    model_z = model.Z
    zsim1 = np.zeros(len(model_z.columns))
    zsim1[0] = 1
    zsima = np.zeros(len(model_z.columns))
    zsima[0] = 1
    for j in range(1, len(model_z.columns)):
        if (
                max(model_z.iloc[:, j]) == 1
                and min(model_z.iloc[:, j]) == 0
                and len(np.unique(model_z.iloc[:, j])) == 2
        ):
            zsim1[j] = 0
        else:
            zsim1[j] = np.mean(model_z.iloc[:, j])
    for j in range(1, len(model_z.columns)):
        if (
                max(model_z.iloc[:, j]) == 1
                and min(model_z.iloc[:, j]) == 0
                and len(np.unique(model_z.iloc[:, j])) == 2
        ):
            zsima[j] = 1
        else:
            zsima[j] = np.mean(model_z.iloc[:, j]) + np.std(model_z.iloc[:, j])
    zsim2 = zsim1.copy()
    zsim2[inflvar] = zsima[inflvar]
    np.random.seed(1)
    probs1 = np.zeros(nsims)
    probs2 = np.zeros(nsims)
    for i in range(nsims):
        gsim = np.random.multivariate_normal(estimate, vcov)
        gsim2 = gsim[model.ycat - 1: model.ycat - 1 + len(model.inflate)]
        zg1 = zsim1.dot(gsim2)
        zg2 = zsim2.dot(gsim2)
        probs1[i] = norm.cdf(zg1)
        probs2[i] = norm.cdf(zg2)
    name = model.coefs.index[model.ycat - 1 + inflvar]
    changeprobs = pd.DataFrame({name.replace("Inflation: ", "") + "= 0": probs1,
                                name.replace("Inflation: ", "") + "= 1": probs2}
                               )
    return changeprobs


[docs]def ordered_effects(model, ordvar, nsims=10000):
    """Calculate the changes in probability in each outcome in OP stage.

    This function calculates predicted probabilities
    when there is a change in the value of a covariate
    in the ordered probit equation.
    The chosen dummy variable is changed from 0 to 1,
    and chosen numerical variable is mean value + 1 standard deviation.
    Other variables are kept at 0 or mean value
    (Note: the current version of the function
    recognizes ordinal variables as numerical).

    :param model: :class:`IopModel` or :class:`IopCModel`.
    :param ordvar: Number representing the location of variable
        in the ordered probit equation.
        (attribute .ordered of :class:`IopModel` or :class:`IopCModel`)
    :type ordvar: int
    :param nsims: Number of simulated observations, default is 10000.
    :type nsims: int
    :return: changeprobs: A dataframe of the predicted
        probabilities when there is change in the variable for each outcome (1)
        versus original values (0).
    """
    estimate = model.coefs.iloc[:, 0]
    vcov = model.vcov
    model_x = model.X
    xsim1 = np.zeros(len(model_x.columns))
    xsima = np.zeros(len(model_x.columns))
    for j in range(len(model_x.columns)):
        if (
                max(model_x.iloc[:, j]) == 1
                and min(model_x.iloc[:, j]) == 0
                and len(np.unique(model_x.iloc[:, j])) == 2
        ):
            xsim1[j] = 0
        else:
            xsim1[j] = np.mean(model_x.iloc[:, j])
    for j in range(len(model_x.columns)):
        if (
                max(model_x.iloc[:, j]) == 1
                and min(model_x.iloc[:, j]) == 0
                and len(np.unique(model_x.iloc[:, j])) == 2
        ):
            xsima[j] = 1
        else:
            xsima[j] = np.mean(model_x.iloc[:, j]) + np.std(model_x.iloc[:, j])
    xsim2 = xsim1.copy()
    xsim2[ordvar] = xsima[ordvar]
    np.random.seed(1)
    probsordered1 = np.zeros(model.ycat)
    probsordered2 = np.zeros(model.ycat)
    cprobs = np.zeros((model.ycat - 1, 1))
    cprobs[0, 0] = model.cutpoints[0]
    for j in range(1, model.ycat - 1):
        cprobs[j, 0] = cprobs[j - 1, 0] + np.exp(model.cutpoints[j])
    probs1 = pd.DataFrame(index=np.arange(nsims),
                          columns=np.arange(model.ycat))
    probs2 = pd.DataFrame(index=np.arange(nsims),
                          columns=np.arange(model.ycat))
    name = model.coefs.index[model.ycat - 1 + len(model.inflate) + ordvar]
    probs1 = probs1.add_suffix(": " + name.replace("Ordered: ", "") + " = 0")
    probs2 = probs2.add_suffix(": " + name.replace("Ordered: ", "") + " = 1")
    for i in range(nsims):
        bsim = np.random.multivariate_normal(estimate, vcov)
        bsim2 = bsim[model.ycat - 1 + len(model.inflate):
                     model.ycat - 1 + len(model.inflate) + len(model.ordered)]
        xb1 = xsim1.dot(bsim2)
        xb2 = xsim2.dot(bsim2)
        probsordered1[model.ycat - 1] = 1 - norm.cdf(
            cprobs[model.ycat - 2, 0] - xb1)
        probsordered1[0] = norm.cdf(cprobs[0, 0] - xb1)
        for j in range(1, model.ycat - 1):
            probsordered1[j] = norm.cdf(cprobs[j, 0] - xb1) - (
                norm.cdf(cprobs[j - 1, 0] - xb1)
            )
        probsordered2[model.ycat - 1] = 1 - norm.cdf(
            cprobs[model.ycat - 2, 0] - xb2)
        probsordered2[0] = norm.cdf(cprobs[0, 0] - xb2)
        for j in range(1, model.ycat - 1):
            probsordered2[j] = norm.cdf(cprobs[j, 0] - xb2) - (
                norm.cdf(cprobs[j - 1, 0] - xb2)
            )
        probs1.iloc[i:, ] = probsordered1
        probs2.iloc[i:, ] = probsordered2
    changeprobs = pd.DataFrame(index=np.arange(nsims),
                               columns=np.arange(2 * model.ycat))
    newnames = list(np.repeat("", model.ycat * 2))
    for j in range(0, 2 * model.ycat, 2):
        changeprobs.iloc[:, j] = probs1.iloc[:, round(j / 2)]
    for j in range(1, 2 * model.ycat, 2):
        changeprobs.iloc[:, j] = probs2.iloc[:, round((j - 1) / 2)]
    for j in range(0, 2 * model.ycat, 2):
        newnames[j] = list(probs1.columns)[round(j / 2)]
    for j in range(1, 2 * model.ycat, 2):
        newnames[j] = list(probs2.columns)[round((j - 1) / 2)]
    changeprobs.columns = newnames
    return changeprobs