.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "_examples/utils/mic.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        Click :ref:`here <sphx_glr_download__examples_utils_mic.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr__examples_utils_mic.py:


MIC package
---------------------------

v1 = [R, R, R, R]
v2 = [R, R, R, R]

.. GENERATED FROM PYTHON SOURCE LINES 10-265

.. code-block:: default
   :lineno-start: 10

    # Libraries
    import numpy as np
    import pandas as pd

    from scipy.stats.contingency import crosstab
    from sklearn.metrics import mutual_info_score


    def mutual_info_matrix_v3(x=None, y=None, ct=None):
        """Compute the component information score.

        .. note: Might be inefficient but good for testing.

        .. note: In order to be able to compute the mutual
                 information score it is necessary to have
                 variation within the variable. Thus, if
                 there is only one class, should we return
                 a result or a warning?

        Parameters
        ----------
        x: list
            List with the classes
        y: list
            List with the classes

        Returns
        -------
        """

        def _check_nparray(obj, param_name):
            if obj is not None:
                if isinstance(obj, np.ndarray):
                    return obj
                elif isinstance(obj, pd.Series):
                    return obj.to_numpy()
                elif isinstance(obj, pd.DataFrame):
                    return obj.to_numpy()
                elif isinstance(obj, list):
                    return np.array(obj)
                else:
                    raise ValueError("""
                           The input parameter '{0}' is of type '{1} which is 
                           not supported. Please ensure it is a np.ndarray."""
                                     .format(param_name, type(obj)))

            # Ensure they are all np arrays

        x = _check_nparray(x, 'x')
        y = _check_nparray(y, 'y')
        ct = _check_nparray(ct, 'ct')

        # Compute contingency
        if ct is None:
            c = crosstab(x,y)
            if isinstance(c, tuple):
                ct = c[-1]   # older scipy
            else:
                ct = c.count # newer scipy

        # Variables
        n = ct.sum()
        pi = np.ravel(ct.sum(axis=1)) / n
        pj = np.ravel(ct.sum(axis=0)) / n

        # Create empty matrix
        m = np.empty(ct.shape)
        m[:] = np.nan

        # Fill with component information score
        with np.errstate(all='ignore'):
            for i in range(m.shape[0]):
                for j in range(m.shape[1]):
                    pxy = ct[i,j] / n
                    m[i,j] = pxy * np.log(pxy / (pi[i] * pj[j]))

        # Fill with na (lim x->0 => 0)
        m[np.isnan(m)] = 0

        # Return
        return m


    def mutual_info_matrix_v2(x=None, y=None, ct=None):
        """Computes the component information.

        The component information is calculated as below where X/Y
        denotes a state (e.g. RR).

            C(X/Y) = P(X/Y) * log(P(X/Y) / P(X)*P(Y))

              c11 c12
        C =   c21 c22

        pi = [pi1, pi2]
        pj = [pj1, pj2]

        ci11 = c11/n * np.log(c11/n / pi1*pj1)
        ci12 = c12/n * np.log(c12/n / pi1*pj2)
        ci21 = c21/n * np.log(c21/n / pi2*pj1)
        ci22 = c22/n * np.log(c22/n / pi2*pj2)

        .. warning: It only works for square contingency matrices; that is, the
                    number of different classes appearing in the vectors x and y
                    must be the same.

                    Enough for susceptibility test with only R and S. Will
                    fail if only one is present or I is introduced.

        Example
        -------
        sex           Female  Male  Total
        lbl
        Not Survived      89   483    572
        Survived         230   112    342
        Total            319   595    914

        # Manual example.
        ci11 = (89  / 914) * np.log((89  / 914) / (572 / 914) * (319 / 914))
        ci12 = (483 / 914) * np.log((483 / 914) / (572 / 914) * (595 / 914))
        ci21 = (230 / 914) * np.log((230 / 914) / (342 / 914) * (319 / 914))
        ci22 = (112 / 914) * np.log((112 / 914) / (342 / 914) * (595 / 914))
        m = np.array([[ci11, ci12], [ci21, ci22]])

        # Compute
        m = component_information_v0(x=data.lbl, y=d.sex)

        Parameters
        ----------

        Returns
        -------

        """

        def _check_nparray(obj, param_name):
            if obj is not None:
                if isinstance(obj, np.ndarray):
                    return obj
                elif isinstance(obj, pd.Series):
                    return obj.to_numpy()
                elif isinstance(obj, pd.DataFrame):
                    return obj.to_numpy()
                elif isinstance(obj, list):
                    return np.array(obj)
                else:
                    raise ValueError("""
                           The input parameter '{0}' is of type '{1} which is 
                           not supported. Please ensure it is a np.ndarray."""
                                     .format(param_name, type(obj)))

            # Ensure they are all np arrays

        x = _check_nparray(x, 'x')
        y = _check_nparray(y, 'y')
        ct = _check_nparray(ct, 'ct')

        # Compute contingency
        if ct is None:
            c = crosstab(x, y)
            if isinstance(c, tuple):
                ct = c[-1]  # older scipy
            else:
                ct = c.count  # newer scipy

        with np.errstate(divide='ignore'):
            # Variables
            n = ct.sum()
            pi = np.ravel(ct.sum(axis=1))
            pj = np.ravel(ct.sum(axis=0))

            # Compute matrix
            b = np.repeat(pi.reshape(-1,1), len(pi), axis=1)
            c = np.repeat(pj.reshape(1,-1), len(pj), axis=0)
            m = (ct/n) * np.log((ct/n) / ((b/n) * (c/n)))

        # Fill with na (lim x->0 => 0)
        m[np.isnan(m)] = 0

        # Return
        return m


    def mutual_info_matrix_v1(x=None, y=None, *, ct=None):
        """Computes the mutual information matrix

        The component information is calculated as below where X/Y
        denotes a state (e.g. RR).

            C(X/Y) = P(XY) * log(P(XY) / P(X)*P(Y))
                   = P(XY) * [log(P(XY)) - log(P(X)*P(Y))]
                   = P(XY) * [log(P(XY)) - (log(P(X)) + log(P(Y)))]
                   = P(XY) * [log(P(XY)) - log(P(X)) - log(P(Y))]

        .. note:: It is inspired by the code from sklearn.metrics.mutual_info_score.

        Notes
        -----
        The logarithm used is the natural logarithm (base-e).
        """
        from math import log
        from scipy import sparse as sp
        from sklearn.metrics.cluster._supervised import check_clusterings
        from sklearn.metrics.cluster._supervised import check_array
        from sklearn.metrics.cluster._supervised import contingency_matrix

        labels_true, labels_pred, contingency = x, y, ct

        if contingency is None:
            labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
            contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
        else:
            contingency = check_array(
                contingency,
                accept_sparse=["csr", "csc", "coo"],
                dtype=[int, np.int32, np.int64],
            )

        if isinstance(contingency, np.ndarray):
            # For an array
            nzx, nzy = np.nonzero(contingency)
            nz_val = contingency[nzx, nzy]
        elif sp.issparse(contingency):
            # For a sparse matrix
            nzx, nzy, nz_val = sp.find(contingency)
        else:
            raise ValueError("Unsupported type for 'contingency': %s" % type(contingency))

        contingency_sum = contingency.sum()
        pi = np.ravel(contingency.sum(axis=1))
        pj = np.ravel(contingency.sum(axis=0))

        # Since MI <= min(H(X), H(Y)), any labelling with zero entropy, i.e. containing a
        # single cluster, implies MI = 0
        if pi.size == 1 or pj.size == 1:
            return 0.0

        log_contingency_nm = np.log(nz_val)
        contingency_nm = nz_val / contingency_sum
        # Don't need to calculate the full outer product, just for non-zeroes
        outer = pi.take(nzx).astype(np.int64, copy=False) * \
                pj.take(nzy).astype(np.int64, copy=False) # b*c
        log_outer = -np.log(outer) + np.log(pi.sum()) + np.log(pj.sum())

        mi = (
            contingency_nm * (log_contingency_nm - np.log(contingency_sum))
            + contingency_nm * log_outer
        )
        mi = np.where(np.abs(mi) < np.finfo(mi.dtype).eps, 0.0, mi)

        #return np.clip(mi.sum(), 0.0, None), mi
        try:
            return np.array(mi).reshape(contingency.shape).T
        except:
            return mi
            #return mutual_info_matrix_v3(x=x, y=y, ct=ct)

.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 0 minutes  0.000 seconds)


.. _sphx_glr_download__examples_utils_mic.py:


.. only :: html

 .. container:: sphx-glr-footer
    :class: sphx-glr-footer-example


  .. container:: sphx-glr-download sphx-glr-download-python

     :download:`Download Python source code: mic.py <mic.py>`


  .. container:: sphx-glr-download sphx-glr-download-jupyter

     :download:`Download Jupyter notebook: mic.ipynb <mic.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_