The Algorithms logo
The Algorithms
AboutDonate

Naive Bayes

C
H
E
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "iris = datasets.load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(iris.data)\n",
    "df.columns = [\"sl\", \"sw\", 'pl', 'pw']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def abc(k, *val):\n",
    "    if k < val[0]:\n",
    "        return 0\n",
    "    else:\n",
    "        return 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      1\n",
       "1      0\n",
       "2      0\n",
       "3      0\n",
       "4      1\n",
       "5      1\n",
       "6      0\n",
       "7      1\n",
       "8      0\n",
       "9      0\n",
       "10     1\n",
       "11     0\n",
       "12     0\n",
       "13     0\n",
       "14     1\n",
       "15     1\n",
       "16     1\n",
       "17     1\n",
       "18     1\n",
       "19     1\n",
       "20     1\n",
       "21     1\n",
       "22     0\n",
       "23     1\n",
       "24     0\n",
       "25     1\n",
       "26     1\n",
       "27     1\n",
       "28     1\n",
       "29     0\n",
       "      ..\n",
       "120    1\n",
       "121    1\n",
       "122    1\n",
       "123    1\n",
       "124    1\n",
       "125    1\n",
       "126    1\n",
       "127    1\n",
       "128    1\n",
       "129    1\n",
       "130    1\n",
       "131    1\n",
       "132    1\n",
       "133    1\n",
       "134    1\n",
       "135    1\n",
       "136    1\n",
       "137    1\n",
       "138    1\n",
       "139    1\n",
       "140    1\n",
       "141    1\n",
       "142    1\n",
       "143    1\n",
       "144    1\n",
       "145    1\n",
       "146    1\n",
       "147    1\n",
       "148    1\n",
       "149    1\n",
       "Name: sl, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sl.apply(abc, args=(5,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def label(val, *boundaries):\n",
    "    if (val < boundaries[0]):\n",
    "        return 'a'\n",
    "    elif (val < boundaries[1]):\n",
    "        return 'b'\n",
    "    elif (val < boundaries[2]):\n",
    "        return 'c'\n",
    "    else:\n",
    "        return 'd'\n",
    "\n",
    "def toLabel(df, old_feature_name):\n",
    "    second = df[old_feature_name].mean()\n",
    "    minimum = df[old_feature_name].min()\n",
    "    first = (minimum + second)/2\n",
    "    maximum = df[old_feature_name].max()\n",
    "    third = (maximum + second)/2\n",
    "    return df[old_feature_name].apply(label, args= (first, second, third))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sl</th>\n",
       "      <th>sw</th>\n",
       "      <th>pl</th>\n",
       "      <th>pw</th>\n",
       "      <th>sl_labeled</th>\n",
       "      <th>sw_labeled</th>\n",
       "      <th>pl_labeled</th>\n",
       "      <th>pw_labeled</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.4</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4.4</td>\n",
       "      <td>2.9</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.1</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.1</td>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>4.3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>5.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>5.7</td>\n",
       "      <td>4.4</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.4</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.4</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>5.7</td>\n",
       "      <td>3.8</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.3</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.8</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.3</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.2</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.7</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.4</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.3</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.5</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>4.8</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.9</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.4</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>5.2</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>5.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>6.9</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.3</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>5.6</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.9</td>\n",
       "      <td>2.0</td>\n",
       "      <td>b</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>7.7</td>\n",
       "      <td>2.8</td>\n",
       "      <td>6.7</td>\n",
       "      <td>2.0</td>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.7</td>\n",
       "      <td>4.9</td>\n",
       "      <td>1.8</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.3</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.1</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>7.2</td>\n",
       "      <td>3.2</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.8</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>6.2</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>6.1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.9</td>\n",
       "      <td>1.8</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>6.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.1</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>7.2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.8</td>\n",
       "      <td>1.6</td>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>7.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>6.1</td>\n",
       "      <td>1.9</td>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>7.9</td>\n",
       "      <td>3.8</td>\n",
       "      <td>6.4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>6.4</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.2</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>6.1</td>\n",
       "      <td>2.6</td>\n",
       "      <td>5.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>7.7</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2.3</td>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>6.3</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.4</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>6.4</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.5</td>\n",
       "      <td>1.8</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>6.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.1</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.6</td>\n",
       "      <td>2.4</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>6.9</td>\n",
       "      <td>3.1</td>\n",
       "      <td>5.1</td>\n",
       "      <td>2.3</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>5.8</td>\n",
       "      <td>2.7</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.9</td>\n",
       "      <td>b</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>6.8</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.9</td>\n",
       "      <td>2.3</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.3</td>\n",
       "      <td>5.7</td>\n",
       "      <td>2.5</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>6.7</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.3</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>6.3</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.9</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>6.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.3</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>5.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      sl   sw   pl   pw sl_labeled sw_labeled pl_labeled pw_labeled\n",
       "0    5.1  3.5  1.4  0.2          b          c          a          a\n",
       "1    4.9  3.0  1.4  0.2          a          b          a          a\n",
       "2    4.7  3.2  1.3  0.2          a          c          a          a\n",
       "3    4.6  3.1  1.5  0.2          a          c          a          a\n",
       "4    5.0  3.6  1.4  0.2          a          c          a          a\n",
       "5    5.4  3.9  1.7  0.4          b          d          a          a\n",
       "6    4.6  3.4  1.4  0.3          a          c          a          a\n",
       "7    5.0  3.4  1.5  0.2          a          c          a          a\n",
       "8    4.4  2.9  1.4  0.2          a          b          a          a\n",
       "9    4.9  3.1  1.5  0.1          a          c          a          a\n",
       "10   5.4  3.7  1.5  0.2          b          c          a          a\n",
       "11   4.8  3.4  1.6  0.2          a          c          a          a\n",
       "12   4.8  3.0  1.4  0.1          a          b          a          a\n",
       "13   4.3  3.0  1.1  0.1          a          b          a          a\n",
       "14   5.8  4.0  1.2  0.2          b          d          a          a\n",
       "15   5.7  4.4  1.5  0.4          b          d          a          a\n",
       "16   5.4  3.9  1.3  0.4          b          d          a          a\n",
       "17   5.1  3.5  1.4  0.3          b          c          a          a\n",
       "18   5.7  3.8  1.7  0.3          b          d          a          a\n",
       "19   5.1  3.8  1.5  0.3          b          d          a          a\n",
       "20   5.4  3.4  1.7  0.2          b          c          a          a\n",
       "21   5.1  3.7  1.5  0.4          b          c          a          a\n",
       "22   4.6  3.6  1.0  0.2          a          c          a          a\n",
       "23   5.1  3.3  1.7  0.5          b          c          a          a\n",
       "24   4.8  3.4  1.9  0.2          a          c          a          a\n",
       "25   5.0  3.0  1.6  0.2          a          b          a          a\n",
       "26   5.0  3.4  1.6  0.4          a          c          a          a\n",
       "27   5.2  3.5  1.5  0.2          b          c          a          a\n",
       "28   5.2  3.4  1.4  0.2          b          c          a          a\n",
       "29   4.7  3.2  1.6  0.2          a          c          a          a\n",
       "..   ...  ...  ...  ...        ...        ...        ...        ...\n",
       "120  6.9  3.2  5.7  2.3          d          c          d          d\n",
       "121  5.6  2.8  4.9  2.0          b          b          c          d\n",
       "122  7.7  2.8  6.7  2.0          d          b          d          d\n",
       "123  6.3  2.7  4.9  1.8          c          b          c          c\n",
       "124  6.7  3.3  5.7  2.1          c          c          d          d\n",
       "125  7.2  3.2  6.0  1.8          d          c          d          c\n",
       "126  6.2  2.8  4.8  1.8          c          b          c          c\n",
       "127  6.1  3.0  4.9  1.8          c          b          c          c\n",
       "128  6.4  2.8  5.6  2.1          c          b          d          d\n",
       "129  7.2  3.0  5.8  1.6          d          b          d          c\n",
       "130  7.4  2.8  6.1  1.9          d          b          d          d\n",
       "131  7.9  3.8  6.4  2.0          d          d          d          d\n",
       "132  6.4  2.8  5.6  2.2          c          b          d          d\n",
       "133  6.3  2.8  5.1  1.5          c          b          c          c\n",
       "134  6.1  2.6  5.6  1.4          c          b          d          c\n",
       "135  7.7  3.0  6.1  2.3          d          b          d          d\n",
       "136  6.3  3.4  5.6  2.4          c          c          d          d\n",
       "137  6.4  3.1  5.5  1.8          c          c          d          c\n",
       "138  6.0  3.0  4.8  1.8          c          b          c          c\n",
       "139  6.9  3.1  5.4  2.1          d          c          d          d\n",
       "140  6.7  3.1  5.6  2.4          c          c          d          d\n",
       "141  6.9  3.1  5.1  2.3          d          c          c          d\n",
       "142  5.8  2.7  5.1  1.9          b          b          c          d\n",
       "143  6.8  3.2  5.9  2.3          c          c          d          d\n",
       "144  6.7  3.3  5.7  2.5          c          c          d          d\n",
       "145  6.7  3.0  5.2  2.3          c          b          c          d\n",
       "146  6.3  2.5  5.0  1.9          c          a          c          d\n",
       "147  6.5  3.0  5.2  2.0          c          b          c          d\n",
       "148  6.2  3.4  5.4  2.3          c          c          d          d\n",
       "149  5.9  3.0  5.1  1.8          c          b          c          c\n",
       "\n",
       "[150 rows x 8 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['sl_labeled'] = toLabel(df, 'sl')\n",
    "df['sw_labeled'] = toLabel(df, 'sw')\n",
    "df['pl_labeled'] = toLabel(df, 'pl')\n",
    "df['pw_labeled'] = toLabel(df, 'pw')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop(['sl', 'sw', 'pl', 'pw'], axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a', 'b', 'c', 'd'}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df['sl_labeled'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"output\"] = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sl_labeled</th>\n",
       "      <th>sw_labeled</th>\n",
       "      <th>pl_labeled</th>\n",
       "      <th>pw_labeled</th>\n",
       "      <th>output</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>a</td>\n",
       "      <td>b</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121</th>\n",
       "      <td>b</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>d</td>\n",
       "      <td>b</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>d</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>b</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>c</td>\n",
       "      <td>a</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>d</td>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>c</td>\n",
       "      <td>b</td>\n",
       "      <td>c</td>\n",
       "      <td>c</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    sl_labeled sw_labeled pl_labeled pw_labeled  output\n",
       "0            b          c          a          a       0\n",
       "1            a          b          a          a       0\n",
       "2            a          c          a          a       0\n",
       "3            a          c          a          a       0\n",
       "4            a          c          a          a       0\n",
       "5            b          d          a          a       0\n",
       "6            a          c          a          a       0\n",
       "7            a          c          a          a       0\n",
       "8            a          b          a          a       0\n",
       "9            a          c          a          a       0\n",
       "10           b          c          a          a       0\n",
       "11           a          c          a          a       0\n",
       "12           a          b          a          a       0\n",
       "13           a          b          a          a       0\n",
       "14           b          d          a          a       0\n",
       "15           b          d          a          a       0\n",
       "16           b          d          a          a       0\n",
       "17           b          c          a          a       0\n",
       "18           b          d          a          a       0\n",
       "19           b          d          a          a       0\n",
       "20           b          c          a          a       0\n",
       "21           b          c          a          a       0\n",
       "22           a          c          a          a       0\n",
       "23           b          c          a          a       0\n",
       "24           a          c          a          a       0\n",
       "25           a          b          a          a       0\n",
       "26           a          c          a          a       0\n",
       "27           b          c          a          a       0\n",
       "28           b          c          a          a       0\n",
       "29           a          c          a          a       0\n",
       "..         ...        ...        ...        ...     ...\n",
       "120          d          c          d          d       2\n",
       "121          b          b          c          d       2\n",
       "122          d          b          d          d       2\n",
       "123          c          b          c          c       2\n",
       "124          c          c          d          d       2\n",
       "125          d          c          d          c       2\n",
       "126          c          b          c          c       2\n",
       "127          c          b          c          c       2\n",
       "128          c          b          d          d       2\n",
       "129          d          b          d          c       2\n",
       "130          d          b          d          d       2\n",
       "131          d          d          d          d       2\n",
       "132          c          b          d          d       2\n",
       "133          c          b          c          c       2\n",
       "134          c          b          d          c       2\n",
       "135          d          b          d          d       2\n",
       "136          c          c          d          d       2\n",
       "137          c          c          d          c       2\n",
       "138          c          b          c          c       2\n",
       "139          d          c          d          d       2\n",
       "140          c          c          d          d       2\n",
       "141          d          c          c          d       2\n",
       "142          b          b          c          d       2\n",
       "143          c          c          d          d       2\n",
       "144          c          c          d          d       2\n",
       "145          c          b          c          d       2\n",
       "146          c          a          c          d       2\n",
       "147          c          b          c          d       2\n",
       "148          c          c          d          d       2\n",
       "149          c          b          c          c       2\n",
       "\n",
       "[150 rows x 5 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def fit(data):\n",
    "    output_name = data.columns[-1]\n",
    "    features = data.columns[0:-1]\n",
    "    counts = {}\n",
    "    possible_outputs = set(data[output_name])\n",
    "    for output in possible_outputs:\n",
    "        counts[output] = {}\n",
    "        smallData = data[data[output_name] == output]\n",
    "        counts[output][\"total_count\"] = len(smallData)\n",
    "        for f in features:\n",
    "            counts[output][f] = {}\n",
    "            possible_values = set(smallData[f])\n",
    "            for value in possible_values:\n",
    "                val_count = len(smallData[smallData[f] == value])\n",
    "                counts[output][f][value] = val_count\n",
    "    return counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {'pl_labeled': {'a': 50},\n",
       "  'pw_labeled': {'a': 50},\n",
       "  'sl_labeled': {'a': 28, 'b': 22},\n",
       "  'sw_labeled': {'a': 1, 'b': 7, 'c': 32, 'd': 10},\n",
       "  'total_count': 50},\n",
       " 1: {'pl_labeled': {'b': 7, 'c': 43},\n",
       "  'pw_labeled': {'b': 10, 'c': 40},\n",
       "  'sl_labeled': {'a': 3, 'b': 21, 'c': 24, 'd': 2},\n",
       "  'sw_labeled': {'a': 13, 'b': 29, 'c': 8},\n",
       "  'total_count': 50},\n",
       " 2: {'pl_labeled': {'c': 20, 'd': 30},\n",
       "  'pw_labeled': {'c': 16, 'd': 34},\n",
       "  'sl_labeled': {'a': 1, 'b': 5, 'c': 29, 'd': 15},\n",
       "  'sw_labeled': {'a': 5, 'b': 28, 'c': 15, 'd': 2},\n",
       "  'total_count': 50}}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fit(df)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
About this Algorithm
from sklearn import datasets
import pandas as pd
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
df.columns = ["sl", "sw", 'pl', 'pw']
def abc(k, *val):
    if k &lt; val[0]:
        return 0
    else:
        return 1
df.sl.apply(abc, args=(5,))
0      1
1      0
2      0
3      0
4      1
5      1
6      0
7      1
8      0
9      0
10     1
11     0
12     0
13     0
14     1
15     1
16     1
17     1
18     1
19     1
20     1
21     1
22     0
23     1
24     0
25     1
26     1
27     1
28     1
29     0
      ..
120    1
121    1
122    1
123    1
124    1
125    1
126    1
127    1
128    1
129    1
130    1
131    1
132    1
133    1
134    1
135    1
136    1
137    1
138    1
139    1
140    1
141    1
142    1
143    1
144    1
145    1
146    1
147    1
148    1
149    1
Name: sl, dtype: int64
def label(val, *boundaries):
    if (val &lt; boundaries[0]):
        return 'a'
    elif (val &lt; boundaries[1]):
        return 'b'
    elif (val &lt; boundaries[2]):
        return 'c'
    else:
        return 'd'

def toLabel(df, old_feature_name):
    second = df[old_feature_name].mean()
    minimum = df[old_feature_name].min()
    first = (minimum + second)/2
    maximum = df[old_feature_name].max()
    third = (maximum + second)/2
    return df[old_feature_name].apply(label, args= (first, second, third))
df['sl_labeled'] = toLabel(df, 'sl')
df['sw_labeled'] = toLabel(df, 'sw')
df['pl_labeled'] = toLabel(df, 'pl')
df['pw_labeled'] = toLabel(df, 'pw')
df
sl sw pl pw sl_labeled sw_labeled pl_labeled pw_labeled
0 5.1 3.5 1.4 0.2 b c a a
1 4.9 3.0 1.4 0.2 a b a a
2 4.7 3.2 1.3 0.2 a c a a
3 4.6 3.1 1.5 0.2 a c a a
4 5.0 3.6 1.4 0.2 a c a a
5 5.4 3.9 1.7 0.4 b d a a
6 4.6 3.4 1.4 0.3 a c a a
7 5.0 3.4 1.5 0.2 a c a a
8 4.4 2.9 1.4 0.2 a b a a
9 4.9 3.1 1.5 0.1 a c a a
10 5.4 3.7 1.5 0.2 b c a a
11 4.8 3.4 1.6 0.2 a c a a
12 4.8 3.0 1.4 0.1 a b a a
13 4.3 3.0 1.1 0.1 a b a a
14 5.8 4.0 1.2 0.2 b d a a
15 5.7 4.4 1.5 0.4 b d a a
16 5.4 3.9 1.3 0.4 b d a a
17 5.1 3.5 1.4 0.3 b c a a
18 5.7 3.8 1.7 0.3 b d a a
19 5.1 3.8 1.5 0.3 b d a a
20 5.4 3.4 1.7 0.2 b c a a
21 5.1 3.7 1.5 0.4 b c a a
22 4.6 3.6 1.0 0.2 a c a a
23 5.1 3.3 1.7 0.5 b c a a
24 4.8 3.4 1.9 0.2 a c a a
25 5.0 3.0 1.6 0.2 a b a a
26 5.0 3.4 1.6 0.4 a c a a
27 5.2 3.5 1.5 0.2 b c a a
28 5.2 3.4 1.4 0.2 b c a a
29 4.7 3.2 1.6 0.2 a c a a
... ... ... ... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 d c d d
121 5.6 2.8 4.9 2.0 b b c d
122 7.7 2.8 6.7 2.0 d b d d
123 6.3 2.7 4.9 1.8 c b c c
124 6.7 3.3 5.7 2.1 c c d d
125 7.2 3.2 6.0 1.8 d c d c
126 6.2 2.8 4.8 1.8 c b c c
127 6.1 3.0 4.9 1.8 c b c c
128 6.4 2.8 5.6 2.1 c b d d
129 7.2 3.0 5.8 1.6 d b d c
130 7.4 2.8 6.1 1.9 d b d d
131 7.9 3.8 6.4 2.0 d d d d
132 6.4 2.8 5.6 2.2 c b d d
133 6.3 2.8 5.1 1.5 c b c c
134 6.1 2.6 5.6 1.4 c b d c
135 7.7 3.0 6.1 2.3 d b d d
136 6.3 3.4 5.6 2.4 c c d d
137 6.4 3.1 5.5 1.8 c c d c
138 6.0 3.0 4.8 1.8 c b c c
139 6.9 3.1 5.4 2.1 d c d d
140 6.7 3.1 5.6 2.4 c c d d
141 6.9 3.1 5.1 2.3 d c c d
142 5.8 2.7 5.1 1.9 b b c d
143 6.8 3.2 5.9 2.3 c c d d
144 6.7 3.3 5.7 2.5 c c d d
145 6.7 3.0 5.2 2.3 c b c d
146 6.3 2.5 5.0 1.9 c a c d
147 6.5 3.0 5.2 2.0 c b c d
148 6.2 3.4 5.4 2.3 c c d d
149 5.9 3.0 5.1 1.8 c b c c

150 rows × 8 columns

df.drop(['sl', 'sw', 'pl', 'pw'], axis = 1, inplace = True)
set(df['sl_labeled'])
{&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;}
df["output"] = iris.target
df
sl_labeled sw_labeled pl_labeled pw_labeled output
0 b c a a 0
1 a b a a 0
2 a c a a 0
3 a c a a 0
4 a c a a 0
5 b d a a 0
6 a c a a 0
7 a c a a 0
8 a b a a 0
9 a c a a 0
10 b c a a 0
11 a c a a 0
12 a b a a 0
13 a b a a 0
14 b d a a 0
15 b d a a 0
16 b d a a 0
17 b c a a 0
18 b d a a 0
19 b d a a 0
20 b c a a 0
21 b c a a 0
22 a c a a 0
23 b c a a 0
24 a c a a 0
25 a b a a 0
26 a c a a 0
27 b c a a 0
28 b c a a 0
29 a c a a 0
... ... ... ... ... ...
120 d c d d 2
121 b b c d 2
122 d b d d 2
123 c b c c 2
124 c c d d 2
125 d c d c 2
126 c b c c 2
127 c b c c 2
128 c b d d 2
129 d b d c 2
130 d b d d 2
131 d d d d 2
132 c b d d 2
133 c b c c 2
134 c b d c 2
135 d b d d 2
136 c c d d 2
137 c c d c 2
138 c b c c 2
139 d c d d 2
140 c c d d 2
141 d c c d 2
142 b b c d 2
143 c c d d 2
144 c c d d 2
145 c b c d 2
146 c a c d 2
147 c b c d 2
148 c c d d 2
149 c b c c 2

150 rows × 5 columns

def fit(data):
    output_name = data.columns[-1]
    features = data.columns[0:-1]
    counts = {}
    possible_outputs = set(data[output_name])
    for output in possible_outputs:
        counts[output] = {}
        smallData = data[data[output_name] == output]
        counts[output]["total_count"] = len(smallData)
        for f in features:
            counts[output][f] = {}
            possible_values = set(smallData[f])
            for value in possible_values:
                val_count = len(smallData[smallData[f] == value])
                counts[output][f][value] = val_count
    return counts
fit(df)
{0: {&#x27;pl_labeled&#x27;: {&#x27;a&#x27;: 50},
  &#x27;pw_labeled&#x27;: {&#x27;a&#x27;: 50},
  &#x27;sl_labeled&#x27;: {&#x27;a&#x27;: 28, &#x27;b&#x27;: 22},
  &#x27;sw_labeled&#x27;: {&#x27;a&#x27;: 1, &#x27;b&#x27;: 7, &#x27;c&#x27;: 32, &#x27;d&#x27;: 10},
  &#x27;total_count&#x27;: 50},
 1: {&#x27;pl_labeled&#x27;: {&#x27;b&#x27;: 7, &#x27;c&#x27;: 43},
  &#x27;pw_labeled&#x27;: {&#x27;b&#x27;: 10, &#x27;c&#x27;: 40},
  &#x27;sl_labeled&#x27;: {&#x27;a&#x27;: 3, &#x27;b&#x27;: 21, &#x27;c&#x27;: 24, &#x27;d&#x27;: 2},
  &#x27;sw_labeled&#x27;: {&#x27;a&#x27;: 13, &#x27;b&#x27;: 29, &#x27;c&#x27;: 8},
  &#x27;total_count&#x27;: 50},
 2: {&#x27;pl_labeled&#x27;: {&#x27;c&#x27;: 20, &#x27;d&#x27;: 30},
  &#x27;pw_labeled&#x27;: {&#x27;c&#x27;: 16, &#x27;d&#x27;: 34},
  &#x27;sl_labeled&#x27;: {&#x27;a&#x27;: 1, &#x27;b&#x27;: 5, &#x27;c&#x27;: 29, &#x27;d&#x27;: 15},
  &#x27;sw_labeled&#x27;: {&#x27;a&#x27;: 5, &#x27;b&#x27;: 28, &#x27;c&#x27;: 15, &#x27;d&#x27;: 2},
  &#x27;total_count&#x27;: 50}}