[
  {
    "path": "Pandas_Cheatsheet.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# What is Pandas?\\n\",\n    \"python library for data manipulation and analysis\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 178,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import pandas as pd\\n\",\n    \"data_frame = pd.read_csv('data/friend_list.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 179,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 179,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"data_frame\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# What is DataFrame?\\n\",\n    \"dataframe is a 2-dimensional labeled data structure with columns\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 180,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 180,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"data_frame.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# What is Series?\\n\",\n    \"Every single column in dataframe is series\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 181,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"pandas.core.series.Series\"\n      ]\n     },\n     \"execution_count\": 181,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"type(data_frame.job)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 182,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>STUDENT</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>DEVELOPER</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>TEACHER</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>DENTIST</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>MANAGER</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    STUDENT\\n\",\n       \"1  Jenny   30  DEVELOPER\\n\",\n       \"2   Nate   30    TEACHER\\n\",\n       \"3  Julia   40    DENTIST\\n\",\n       \"4  Brian   45    MANAGER\"\n      ]\n     },\n     \"execution_count\": 182,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"data_frame.job = data_frame.job.str.upper()\\n\",\n    \"data_frame.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**Series** is just wrapper for python list\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 183,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>num</th>\\n\",\n       \"      <th>word</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>1</td>\\n\",\n       \"      <td>one</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2</td>\\n\",\n       \"      <td>two</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>3</td>\\n\",\n       \"      <td>three</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   num   word\\n\",\n       \"0    1    one\\n\",\n       \"1    2    two\\n\",\n       \"2    3  three\"\n      ]\n     },\n     \"execution_count\": 183,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"s1 = pd.core.series.Series(['one', 'two', 'three'])\\n\",\n    \"s2 = pd.core.series.Series([1, 2, 3])\\n\",\n    \"pd.DataFrame(data=dict(word=s1, num=s2))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Why Pandas?\\n\",\n    \"\\n\",\n    \"Very similar to Excel spreadsheet view,  \\n\",\n    \"support various functions for data manipulation and analysis.  \\n\",\n    \"Fast based on Numpy.  \\n\",\n    \"Easy to manipulate data for your purpose\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Read File to DataFrame\\n\",\n    \"A **Data frame** is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns.\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"by default, pandas support csv format\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 184,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 185,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 185,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can read txt file like below, if the txt file data are comma separated\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 186,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list.txt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 187,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 187,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"if txt file delimiter is not comma, you can use define delimiter using keyword argument\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 188,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list_tab.txt', delimiter = \\\"\\\\t\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 189,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 189,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"if data file doesn't have header,  \\n\",\n    \"Use header = None like below, so first column not to be your column header\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 190,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list_no_head.csv', header = None)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 191,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\\n\",\n       \"3  Julia  40    dentist\\n\",\n       \"4  Brian  45    manager\"\n      ]\n     },\n     \"execution_count\": 191,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can add column header after you create dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 192,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.columns = ['name', 'age', 'job']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 193,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 193,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can create column header for no header data at once\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 194,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list_no_head.csv', header = None, names=['name', 'age', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 195,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 195,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Create DataFrame\\n\",\n    \"when you want to create dataframe from your python code\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## from dictionary\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 196,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 197,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 197,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"if you need fixed column order, you can adjust column order like below,\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 198,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df[['name', 'age', 'job']]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 199,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   Jone   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 199,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## from OrderedDict\\n\",\n    \"OrderedDict helps you to have fixed column order at once\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 200,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from collections import OrderedDict\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 201,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_ordered_dict = OrderedDict([ ('name', ['John', 'Jenny', 'Nate']),\\n\",\n    \"          ('age', [20, 30, 30]),\\n\",\n    \"          ('job', ['student', 'developer', 'teacher']) ] )\\n\",\n    \"df = pd.DataFrame.from_dict(friend_ordered_dict)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 202,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 202,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"collapsed\": true\n   },\n   \"source\": [\n    \"## from list\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 203,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ ['John', 20, 'student'],['Jenny', 30, 'developer'],['Nate', 30, 'teacher'] ]\\n\",\n    \"column_name = ['name', 'age', 'job']\\n\",\n    \"df = pd.DataFrame.from_records(friend_list, columns=column_name)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 204,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 204,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 205,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', 'Jenny', 'Nate']],\\n\",\n    \"                ['age',[20,30,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 206,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 206,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Write DataFrame to File\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"here is one dataframe example with header\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 207,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', 'Jenny', 'nate']],\\n\",\n    \"                ['age',[20,30,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 208,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 208,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can create csv file using below command,\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 209,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"below is one example of dataframe **doesn't** have header\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 210,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ ['John', 20, 'student'],['Jenny', 30, 'developer'],['Nate', 30, 'teacher'] ]\\n\",\n    \"df = pd.DataFrame.from_records(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 211,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\"\n      ]\n     },\n     \"execution_count\": 211,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can write csv file using below command,\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 212,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you also can write txt file using same command\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 213,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.txt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"by default, header and index are True like below, even if you don't mention it in the command\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 214,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', header = True, index = True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**header = False** means you don't want to create column names. no 0,1,2 at column name   \\n\",\n    \"**index = False** means you don't want to create row names.  no 0,1,2 at row name\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 215,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', header = False, index = False)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can specify add column names by giving **header** with list\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 216,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', header = ['name', 'age', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"below is dataframe has **None** value\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 217,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', None, 'nate']],\\n\",\n    \"                ['age',[20,None,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 218,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>nate</td>\\n\",\n       \"      <td>30.0</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name   age        job\\n\",\n       \"0  John  20.0    student\\n\",\n       \"1  None   NaN  developer\\n\",\n       \"2  nate  30.0    teacher\"\n      ]\n     },\n     \"execution_count\": 218,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 219,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**na_rep** replace **None** with provided value\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 220,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', na_rep = '-')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Select Row\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## by index\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 221,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', 'Jenny', 'Nate']],\\n\",\n    \"                ['age',[20,30,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 222,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 222,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"select rows from index 1 to index 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 223,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 223,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df[1:3]\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"select row index 0 and index 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 224,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name  age      job\\n\",\n       \"0  John   20  student\\n\",\n       \"2  Nate   30  teacher\"\n      ]\n     },\n     \"execution_count\": 224,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.loc[[0,2]]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 225,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 225,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## by column condition\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 226,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df_filtered = df[df.age > 25]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 227,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 227,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 228,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df_filtered = df.query('age>25')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 229,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 229,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 230,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df_filtered = df[(df.age >25) & (df.name == 'Nate')]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 231,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name  age      job\\n\",\n       \"2  Nate   30  teacher\"\n      ]\n     },\n     \"execution_count\": 231,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 232,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 232,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Filter Column\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## by index\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 233,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\"\n      ]\n     },\n     \"execution_count\": 233,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_list = [ ['John', 20, 'student'],['Jenny', 30, 'developer'],['Nate', 30, 'teacher'] ]\\n\",\n    \"df = pd.DataFrame.from_records(friend_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"select all rows, from column 0 to column 1\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 234,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1\\n\",\n       \"0   John  20\\n\",\n       \"1  Jenny  30\\n\",\n       \"2   Nate  30\"\n      ]\n     },\n     \"execution_count\": 234,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.iloc[:, 0:2]\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"select all rows, column 0 and column 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 235,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0          2\\n\",\n       \"0   John    student\\n\",\n       \"1  Jenny  developer\\n\",\n       \"2   Nate    teacher\"\n      ]\n     },\n     \"execution_count\": 235,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.iloc[:,[0,2]]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 236,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\"\n      ]\n     },\n     \"execution_count\": 236,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## by column name\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 237,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 237,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# you can create column header for no header data at once\\n\",\n    \"df = pd.read_csv('data/friend_list_no_head.csv', header = None, names=['name', 'age', 'job'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 238,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age\\n\",\n       \"0   John   20\\n\",\n       \"1  Jenny   30\\n\",\n       \"2   Nate   30\\n\",\n       \"3  Julia   40\\n\",\n       \"4  Brian   45\\n\",\n       \"5  Chris   25\"\n      ]\n     },\n     \"execution_count\": 238,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered = df[['name', 'age']]\\n\",\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 239,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job\\n\",\n       \"0   20    student\\n\",\n       \"1   30  developer\\n\",\n       \"2   30    teacher\\n\",\n       \"3   40    dentist\\n\",\n       \"4   45    manager\\n\",\n       \"5   25     intern\"\n      ]\n     },\n     \"execution_count\": 239,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.filter(items=['age', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 240,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 240,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 241,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age\\n\",\n       \"0   John   20\\n\",\n       \"1  Jenny   30\\n\",\n       \"2   Nate   30\\n\",\n       \"3  Julia   40\\n\",\n       \"4  Brian   45\\n\",\n       \"5  Chris   25\"\n      ]\n     },\n     \"execution_count\": 241,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# select columns containing 'a'\\n\",\n    \"df.filter(like='a',axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 242,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"         job\\n\",\n       \"0    student\\n\",\n       \"1  developer\\n\",\n       \"2    teacher\\n\",\n       \"3    dentist\\n\",\n       \"4    manager\\n\",\n       \"5     intern\"\n      ]\n     },\n     \"execution_count\": 242,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# select columns using regex\\n\",\n    \"df.filter(regex='b$',axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Drop rows\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## by row name (index name)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 243,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'age': 20, 'job': 'student'},\\n\",\n    \"         {'age': 30, 'job': 'developer'},\\n\",\n    \"         {'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, index = ['John', 'Jenny', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 244,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>John</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Nate</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"John    20    student\\n\",\n       \"Jenny   30  developer\\n\",\n       \"Nate    30    teacher\"\n      ]\n     },\n     \"execution_count\": 244,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### drop row\\n\",\n    \"dropped result will be shown, but dataframe keeps the dropped row\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 245,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"Jenny   30  developer\"\n      ]\n     },\n     \"execution_count\": 245,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.drop(['John', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 246,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>John</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Nate</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"John    20    student\\n\",\n       \"Jenny   30  developer\\n\",\n       \"Nate    30    teacher\"\n      ]\n     },\n     \"execution_count\": 246,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can assign the result to dataframe to keep the dropped result like below,\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 247,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop(['John', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 248,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"Jenny   30  developer\"\n      ]\n     },\n     \"execution_count\": 248,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### drop row in place\\n\",\n    \"The dropped row will be deleted from dataframe with inplace keyword parameter\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 249,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'age': 20, 'job': 'student'},\\n\",\n    \"         {'age': 30, 'job': 'developer'},\\n\",\n    \"         {'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, index = ['John', 'Jenny', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 250,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.drop(['John', 'Nate'], inplace = True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 251,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"Jenny   30  developer\"\n      ]\n     },\n     \"execution_count\": 251,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## by row id (index number)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 252,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 253,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 253,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"you can drop rows by its index\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 254,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop(df.index[[0,2]])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 255,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"1   30  developer  Jenny\"\n      ]\n     },\n     \"execution_count\": 255,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## By Column value\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 256,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 256,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 257,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df[df.age != 30]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 258,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age      job  name\\n\",\n       \"0   20  student  Jone\"\n      ]\n     },\n     \"execution_count\": 258,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Drop column\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 259,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 259,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 260,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop('age', axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 261,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"         job   name\\n\",\n       \"0    student   Jone\\n\",\n       \"1  developer  Jenny\\n\",\n       \"2    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 261,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"collapsed\": true\n   },\n   \"source\": [\n    \"# Add Column / Update Column\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 262,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   Jone   15    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 262,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 15, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'age', 'job'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Add New Column with default value\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 263,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['salary'] = 0\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 264,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>salary</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job  salary\\n\",\n       \"0   Jone   15    student       0\\n\",\n       \"1  Jenny   30  developer       0\\n\",\n       \"2   Nate   30    teacher       0\"\n      ]\n     },\n     \"execution_count\": 264,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Add New Column derived from existing value\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 265,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   Jone   15    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 265,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 15, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'age', 'job'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## one liner adding column by true or false condition\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 266,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import numpy as np\\n\",\n    \"df['salary'] = np.where(df['job'] != 'student' , 'yes', 'no')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 267,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>salary</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>no</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>yes</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>yes</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job salary\\n\",\n       \"0   Jone   15    student     no\\n\",\n       \"1  Jenny   30  developer    yes\\n\",\n       \"2   Nate   30    teacher    yes\"\n      ]\n     },\n     \"execution_count\": 267,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 268,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final\\n\",\n       \"0   John       95     85\\n\",\n       \"1  Jenny       85     80\\n\",\n       \"2   Nate       10     30\"\n      ]\n     },\n     \"execution_count\": 268,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'John', 'midterm': 95, 'final': 85},\\n\",\n    \"         {'name': 'Jenny', 'midterm': 85, 'final': 80},\\n\",\n    \"         {'name': 'Nate', 'midterm': 10, 'final': 30}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## column derived from adding two existing columns\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 269,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['total'] = df['midterm'] + df['final']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 270,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total\\n\",\n       \"0   John       95     85    180\\n\",\n       \"1  Jenny       85     80    165\\n\",\n       \"2   Nate       10     30     40\"\n      ]\n     },\n     \"execution_count\": 270,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## columm from existing column\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 271,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['average'] = df['total'] / 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 272,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"      <th>average</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>90.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>82.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total  average\\n\",\n       \"0   John       95     85    180     90.0\\n\",\n       \"1  Jenny       85     80    165     82.5\\n\",\n       \"2   Nate       10     30     40     20.0\"\n      ]\n     },\n     \"execution_count\": 272,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## column by conditional condition\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 273,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"grades = []\\n\",\n    \"\\n\",\n    \"for row in df['average']:\\n\",\n    \"    if row >= 90:\\n\",\n    \"        grades.append('A')\\n\",\n    \"    elif row >= 80:\\n\",\n    \"        grades.append('B')\\n\",\n    \"    elif row >= 70:\\n\",\n    \"        grades.append('C')\\n\",\n    \"    else:\\n\",\n    \"        grades.append('F')\\n\",\n    \"        \\n\",\n    \"df['grade'] = grades\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 274,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"      <th>average</th>\\n\",\n       \"      <th>grade</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>90.0</td>\\n\",\n       \"      <td>A</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>82.5</td>\\n\",\n       \"      <td>B</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total  average grade\\n\",\n       \"0   John       95     85    180     90.0     A\\n\",\n       \"1  Jenny       85     80    165     82.5     B\\n\",\n       \"2   Nate       10     30     40     20.0     F\"\n      ]\n     },\n     \"execution_count\": 274,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## how to use apply function\\n\",\n    \"apply function helps you code concisely.\\n\",\n    \"the function will be applied to selected column(s) on all rows\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 275,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def pass_or_fail(row):\\n\",\n    \"    print(row)\\n\",\n    \"    if row != \\\"F\\\":\\n\",\n    \"        return 'Pass'\\n\",\n    \"    else:\\n\",\n    \"        return 'Fail'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 276,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"A\\n\",\n      \"B\\n\",\n      \"F\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"df.grade = df.grade.apply(pass_or_fail)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 277,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"      <th>average</th>\\n\",\n       \"      <th>grade</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>90.0</td>\\n\",\n       \"      <td>Pass</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>82.5</td>\\n\",\n       \"      <td>Pass</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"      <td>Fail</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total  average grade\\n\",\n       \"0   John       95     85    180     90.0  Pass\\n\",\n       \"1  Jenny       85     80    165     82.5  Pass\\n\",\n       \"2   Nate       10     30     40     20.0  Fail\"\n      ]\n     },\n     \"execution_count\": 277,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## info extraction using df.apply\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 278,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd\\n\",\n       \"0  2000-06-27\\n\",\n       \"1  2002-09-24\\n\",\n       \"2  2005-12-20\"\n      ]\n     },\n     \"execution_count\": 278,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"date_list = [{'yyyy-mm-dd': '2000-06-27'},\\n\",\n    \"         {'yyyy-mm-dd': '2002-09-24'},\\n\",\n    \"         {'yyyy-mm-dd': '2005-12-20'}]\\n\",\n    \"df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 279,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def extract_year(row):\\n\",\n    \"    return row.split('-')[0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 280,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['year'] = df['yyyy-mm-dd'].apply(extract_year)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 281,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year\\n\",\n       \"0  2000-06-27  2000\\n\",\n       \"1  2002-09-24  2002\\n\",\n       \"2  2005-12-20  2005\"\n      ]\n     },\n     \"execution_count\": 281,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## passing keyword parameter to apply function\\n\",\n    \"you also can send parameter to apply function\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 282,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def extract_year(year, current_year):\\n\",\n    \"    return current_year - int(year)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 283,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['age'] = df['year'].apply(extract_year, current_year=2018)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 284,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"      <td>16</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"      <td>13</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year  age\\n\",\n       \"0  2000-06-27  2000   18\\n\",\n       \"1  2002-09-24  2002   16\\n\",\n       \"2  2005-12-20  2005   13\"\n      ]\n     },\n     \"execution_count\": 284,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## passing multiple keyword parameter to apply function\\n\",\n    \"you also can send multiple parameter to apply function\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 285,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_introduce(age, prefix, suffix):\\n\",\n    \"    return prefix + str(age) + suffix\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 286,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['introduce'] = df['age'].apply(get_introduce, prefix=\\\"I am \\\", suffix=\\\" years old\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 287,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>introduce</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"      <td>I am 18 years old</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"      <td>16</td>\\n\",\n       \"      <td>I am 16 years old</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"      <td>13</td>\\n\",\n       \"      <td>I am 13 years old</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year  age          introduce\\n\",\n       \"0  2000-06-27  2000   18  I am 18 years old\\n\",\n       \"1  2002-09-24  2002   16  I am 16 years old\\n\",\n       \"2  2005-12-20  2005   13  I am 13 years old\"\n      ]\n     },\n     \"execution_count\": 287,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## passing multiple columns to apply function\\n\",\n    \"you can provide axis=1 in the apply function, so you send all column values to apply function\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 288,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>introduce</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"      <td>I was born in 2000 my age is 18</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"      <td>16</td>\\n\",\n       \"      <td>I was born in 2002 my age is 16</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"      <td>13</td>\\n\",\n       \"      <td>I was born in 2005 my age is 13</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year  age                        introduce\\n\",\n       \"0  2000-06-27  2000   18  I was born in 2000 my age is 18\\n\",\n       \"1  2002-09-24  2002   16  I was born in 2002 my age is 16\\n\",\n       \"2  2005-12-20  2005   13  I was born in 2005 my age is 13\"\n      ]\n     },\n     \"execution_count\": 288,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"def get_introduce2(row):\\n\",\n    \"    return \\\"I was born in \\\"+str(row.year)+\\\" my age is \\\"+str(row.age)\\n\",\n    \"df.introduce = df.apply(get_introduce2, axis=1)\\n\",\n    \"\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## how to use map function\\n\",\n    \"if you give function as parameter, it works same as apply function on the column\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 289,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd\\n\",\n       \"0  2000-06-27\\n\",\n       \"1  2002-09-24\\n\",\n       \"2  2005-12-20\"\n      ]\n     },\n     \"execution_count\": 289,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"date_list = [{'yyyy-mm-dd': '2000-06-27'},\\n\",\n    \"         {'yyyy-mm-dd': '2002-09-24'},\\n\",\n    \"         {'yyyy-mm-dd': '2005-12-20'}]\\n\",\n    \"df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 290,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def extract_year(row):\\n\",\n    \"    return row.split('-')[0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 291,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year\\n\",\n       \"0  2000-06-27  2000\\n\",\n       \"1  2002-09-24  2002\\n\",\n       \"2  2005-12-20  2005\"\n      ]\n     },\n     \"execution_count\": 291,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df['year'] = df['yyyy-mm-dd'].map(extract_year)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"if you give dictionary as parameter,  \\n\",\n    \"column will be updated with new value like  \\n\",\n    \"new value = dict['old value']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 292,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job\\n\",\n       \"0   20    student\\n\",\n       \"1   30  developer\\n\",\n       \"2   30    teacher\"\n      ]\n     },\n     \"execution_count\": 292,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"job_list = [{'age': 20, 'job': 'student'},\\n\",\n    \"         {'age': 30, 'job': 'developer'},\\n\",\n    \"         {'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(job_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 293,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>1</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age  job\\n\",\n       \"0   20    1\\n\",\n       \"1   30    2\\n\",\n       \"2   30    3\"\n      ]\n     },\n     \"execution_count\": 293,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.job = df.job.map({\\\"student\\\":1,\\\"developer\\\":2,\\\"teacher\\\":3})\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Applymap\\n\",\n    \"update all elements in the dataframe at once\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 294,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>x</th>\\n\",\n       \"      <th>y</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>5.5</td>\\n\",\n       \"      <td>-5.6</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>-5.2</td>\\n\",\n       \"      <td>5.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>-1.6</td>\\n\",\n       \"      <td>-4.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"     x    y\\n\",\n       \"0  5.5 -5.6\\n\",\n       \"1 -5.2  5.5\\n\",\n       \"2 -1.6 -4.5\"\n      ]\n     },\n     \"execution_count\": 294,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"x_y = [{'x': 5.5, 'y': -5.6},\\n\",\n    \"         {'x': -5.2, 'y': 5.5},\\n\",\n    \"         {'x': -1.6, 'y': -4.5}]\\n\",\n    \"df = pd.DataFrame(x_y)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 295,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>x</th>\\n\",\n       \"      <th>y</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>6.0</td>\\n\",\n       \"      <td>-6.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>-5.0</td>\\n\",\n       \"      <td>6.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>-2.0</td>\\n\",\n       \"      <td>-4.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"     x    y\\n\",\n       \"0  6.0 -6.0\\n\",\n       \"1 -5.0  6.0\\n\",\n       \"2 -2.0 -4.0\"\n      ]\n     },\n     \"execution_count\": 295,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df = df.applymap(np.around)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Add Row\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 296,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final\\n\",\n       \"0   John       95     85\\n\",\n       \"1  Jenny       85     80\\n\",\n       \"2   Nate       10     30\"\n      ]\n     },\n     \"execution_count\": 296,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'John', 'midterm': 95, 'final': 85},\\n\",\n    \"         {'name': 'Jenny', 'midterm': 85, 'final': 80},\\n\",\n    \"         {'name': 'Nate', 'midterm': 10, 'final': 30}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 297,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df2 = pd.DataFrame([['Ben', 50,50]], columns = ['name', 'midterm', 'final'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 298,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Ben</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"  name  midterm  final\\n\",\n       \"0  Ben       50     50\"\n      ]\n     },\n     \"execution_count\": 298,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df2.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 299,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Ben</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final\\n\",\n       \"0   John       95     85\\n\",\n       \"1  Jenny       85     80\\n\",\n       \"2   Nate       10     30\\n\",\n       \"3    Ben       50     50\"\n      ]\n     },\n     \"execution_count\": 299,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.append(df2, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Group by\\n\",\n    \"group by command helps to get more information from given data\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 300,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Sera</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Sera        Psychology  female\"\n      ]\n     },\n     \"execution_count\": 300,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"student_list = [{'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Abraham', 'major': \\\"Physics\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Brian', 'major': \\\"Psychology\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Janny', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Yuna', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Jeniffer', 'major': \\\"Computer Science\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Edward', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Zara', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Wendy', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Sera', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"}\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 301,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"groupby_major = df.groupby('major')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 302,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'Computer Science': Int64Index([0, 1, 6, 7], dtype='int64'),\\n\",\n       \" 'Economics': Int64Index([4, 5, 9], dtype='int64'),\\n\",\n       \" 'Physics': Int64Index([2], dtype='int64'),\\n\",\n       \" 'Psychology': Int64Index([3, 8, 10], dtype='int64')}\"\n      ]\n     },\n     \"execution_count\": 302,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"groupby_major.groups\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"here we can see, computer science has mostly man, while economic has mostly woman students\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 303,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Computer Science: 4\\n\",\n      \"       name             major     sex\\n\",\n      \"0      John  Computer Science    male\\n\",\n      \"1      Nate  Computer Science    male\\n\",\n      \"6  Jeniffer  Computer Science  female\\n\",\n      \"7    Edward  Computer Science    male\\n\",\n      \"\\n\",\n      \"Economics: 3\\n\",\n      \"    name      major     sex\\n\",\n      \"4  Janny  Economics  female\\n\",\n      \"5   Yuna  Economics  female\\n\",\n      \"9  Wendy  Economics  female\\n\",\n      \"\\n\",\n      \"Physics: 1\\n\",\n      \"      name    major   sex\\n\",\n      \"2  Abraham  Physics  male\\n\",\n      \"\\n\",\n      \"Psychology: 3\\n\",\n      \"     name       major     sex\\n\",\n      \"3   Brian  Psychology    male\\n\",\n      \"8    Zara  Psychology  female\\n\",\n      \"10   Sera  Psychology  female\\n\",\n      \"\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"for name, group in groupby_major:\\n\",\n    \"    print(name + \\\": \\\" + str(len(group)))\\n\",\n    \"    print(group)\\n\",\n    \"    print()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### group object to dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 304,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>count</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>4</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>1</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"              major  count\\n\",\n       \"0  Computer Science      4\\n\",\n       \"1         Economics      3\\n\",\n       \"2           Physics      1\\n\",\n       \"3        Psychology      3\"\n      ]\n     },\n     \"execution_count\": 304,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_major_cnt = pd.DataFrame({'count' : groupby_major.size()}).reset_index()\\n\",\n    \"df_major_cnt\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 305,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"groupby_sex = df.groupby('sex')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"here we can see, this school has balanced woman and man ratio\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 306,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"female: 6\\n\",\n      \"        name             major     sex\\n\",\n      \"4      Janny         Economics  female\\n\",\n      \"5       Yuna         Economics  female\\n\",\n      \"6   Jeniffer  Computer Science  female\\n\",\n      \"8       Zara        Psychology  female\\n\",\n      \"9      Wendy         Economics  female\\n\",\n      \"10      Sera        Psychology  female\\n\",\n      \"\\n\",\n      \"male: 5\\n\",\n      \"      name             major   sex\\n\",\n      \"0     John  Computer Science  male\\n\",\n      \"1     Nate  Computer Science  male\\n\",\n      \"2  Abraham           Physics  male\\n\",\n      \"3    Brian        Psychology  male\\n\",\n      \"7   Edward  Computer Science  male\\n\",\n      \"\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"for name, group in groupby_sex:\\n\",\n    \"    print(name + \\\": \\\" + str(len(group)))\\n\",\n    \"    print(group)\\n\",\n    \"    print()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 307,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"      <th>count</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>female</td>\\n\",\n       \"      <td>6</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>male</td>\\n\",\n       \"      <td>5</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      sex  count\\n\",\n       \"0  female      6\\n\",\n       \"1    male      5\"\n      ]\n     },\n     \"execution_count\": 307,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_sex_cnt = pd.DataFrame({'count' : groupby_sex.size()}).reset_index()\\n\",\n    \"df_sex_cnt\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Drop Duplicate\\n\",\n    \"sometimes you need to drop duplicate rows and here is elegant way to to it\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 308,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Sera</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Sera        Psychology  female\\n\",\n       \"11      John  Computer Science    male\"\n      ]\n     },\n     \"execution_count\": 308,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"student_list = [{'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Abraham', 'major': \\\"Physics\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Brian', 'major': \\\"Psychology\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Janny', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Yuna', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Jeniffer', 'major': \\\"Computer Science\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Edward', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Zara', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Wendy', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Sera', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## check if there is duplicated row\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 309,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"0     False\\n\",\n       \"1     False\\n\",\n       \"2     False\\n\",\n       \"3     False\\n\",\n       \"4     False\\n\",\n       \"5     False\\n\",\n       \"6     False\\n\",\n       \"7     False\\n\",\n       \"8     False\\n\",\n       \"9     False\\n\",\n       \"10    False\\n\",\n       \"11     True\\n\",\n       \"dtype: bool\"\n      ]\n     },\n     \"execution_count\": 309,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.duplicated()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 310,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop_duplicates()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 311,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Sera</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Sera        Psychology  female\"\n      ]\n     },\n     \"execution_count\": 311,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 312,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Nate              None    male\\n\",\n       \"11      John  Computer Science    None\"\n      ]\n     },\n     \"execution_count\": 312,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"student_list = [{'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Abraham', 'major': \\\"Physics\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Brian', 'major': \\\"Psychology\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Janny', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Yuna', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Jeniffer', 'major': \\\"Computer Science\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Edward', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Zara', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Wendy', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': None, 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'John', 'major': \\\"Computer Science\\\", 'sex': None},\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 313,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"0     False\\n\",\n       \"1     False\\n\",\n       \"2     False\\n\",\n       \"3     False\\n\",\n       \"4     False\\n\",\n       \"5     False\\n\",\n       \"6     False\\n\",\n       \"7     False\\n\",\n       \"8     False\\n\",\n       \"9     False\\n\",\n       \"10     True\\n\",\n       \"11     True\\n\",\n       \"dtype: bool\"\n      ]\n     },\n     \"execution_count\": 313,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.duplicated(['name'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 314,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Nate              None    male\\n\",\n       \"11      John  Computer Science    None\"\n      ]\n     },\n     \"execution_count\": 314,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.drop_duplicates(['name'], keep='last')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 315,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Nate              None    male\\n\",\n       \"11      John  Computer Science    None\"\n      ]\n     },\n     \"execution_count\": 315,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# how to manage None value?\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 316,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>40.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>35.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>37.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>10.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>12.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>11.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      name      job   age\\n\",\n       \"0     John  teacher  40.0\\n\",\n       \"1     Nate  teacher  35.0\\n\",\n       \"2     Yuna  teacher  37.0\\n\",\n       \"3  Abraham  student  10.0\\n\",\n       \"4    Brian  student  12.0\\n\",\n       \"5    Janny  student  11.0\\n\",\n       \"6     Nate  teacher   NaN\\n\",\n       \"7     John  student   NaN\"\n      ]\n     },\n     \"execution_count\": 316,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"school_id_list = [{'name': 'John', 'job': \\\"teacher\\\", 'age': 40},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\", 'age': 35},\\n\",\n    \"                {'name': 'Yuna', 'job': \\\"teacher\\\", 'age': 37},\\n\",\n    \"                {'name': 'Abraham', 'job': \\\"student\\\", 'age': 10},\\n\",\n    \"                {'name': 'Brian', 'job': \\\"student\\\", 'age': 12},\\n\",\n    \"                {'name': 'Janny', 'job': \\\"student\\\", 'age': 11},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\", 'age': None},\\n\",\n    \"                {'name': 'John', 'job': \\\"student\\\", 'age': None}\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## how to check if there is Null or NaN\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 317,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"<class 'pandas.core.frame.DataFrame'>\\n\",\n      \"RangeIndex: 8 entries, 0 to 7\\n\",\n      \"Data columns (total 3 columns):\\n\",\n      \"name    8 non-null object\\n\",\n      \"job     8 non-null object\\n\",\n      \"age     6 non-null float64\\n\",\n      \"dtypes: float64(1), object(2)\\n\",\n      \"memory usage: 272.0+ bytes\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"df.info()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 318,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name    job    age\\n\",\n       \"0  False  False  False\\n\",\n       \"1  False  False  False\\n\",\n       \"2  False  False  False\\n\",\n       \"3  False  False  False\\n\",\n       \"4  False  False  False\\n\",\n       \"5  False  False  False\\n\",\n       \"6  False  False   True\\n\",\n       \"7  False  False   True\"\n      ]\n     },\n     \"execution_count\": 318,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.isna()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 319,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name    job    age\\n\",\n       \"0  False  False  False\\n\",\n       \"1  False  False  False\\n\",\n       \"2  False  False  False\\n\",\n       \"3  False  False  False\\n\",\n       \"4  False  False  False\\n\",\n       \"5  False  False  False\\n\",\n       \"6  False  False   True\\n\",\n       \"7  False  False   True\"\n      ]\n     },\n     \"execution_count\": 319,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.isnull()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## how to fill Null or NaN\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 320,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>40.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>35.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>37.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>10.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>12.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>11.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      name      job   age\\n\",\n       \"0     John  teacher  40.0\\n\",\n       \"1     Nate  teacher  35.0\\n\",\n       \"2     Yuna  teacher  37.0\\n\",\n       \"3  Abraham  student  10.0\\n\",\n       \"4    Brian  student  12.0\\n\",\n       \"5    Janny  student  11.0\\n\",\n       \"6     Nate  teacher   0.0\\n\",\n       \"7     John  student   0.0\"\n      ]\n     },\n     \"execution_count\": 320,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"tmp = df\\n\",\n    \"tmp[\\\"age\\\"] = tmp[\\\"age\\\"].fillna(0)\\n\",\n    \"tmp\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 321,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# fill missing age with median age for each group (teacher, student)\\n\",\n    \"df[\\\"age\\\"].fillna(df.groupby(\\\"job\\\")[\\\"age\\\"].transform(\\\"median\\\"), inplace=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 322,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>40.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>35.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>37.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>10.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>12.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>11.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      name      job   age\\n\",\n       \"0     John  teacher  40.0\\n\",\n       \"1     Nate  teacher  35.0\\n\",\n       \"2     Yuna  teacher  37.0\\n\",\n       \"3  Abraham  student  10.0\\n\",\n       \"4    Brian  student  12.0\\n\",\n       \"5    Janny  student  11.0\\n\",\n       \"6     Nate  teacher   0.0\\n\",\n       \"7     John  student   0.0\"\n      ]\n     },\n     \"execution_count\": 322,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Unique\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 323,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"job_list = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Fred', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Abraham', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Brian', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Janny', 'job': \\\"developer\\\"},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Obrian', 'job': \\\"dentist\\\"},\\n\",\n    \"                {'name': 'Yuna', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Rob', 'job': \\\"lawyer\\\"},\\n\",\n    \"                {'name': 'Brian', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Matt', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Wendy', 'job': \\\"banker\\\"},\\n\",\n    \"                {'name': 'Edward', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Ian', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Chris', 'job': \\\"banker\\\"},\\n\",\n    \"                {'name': 'Philip', 'job': \\\"lawyer\\\"},\\n\",\n    \"                {'name': 'Janny', 'job': \\\"basketball player\\\"},\\n\",\n    \"                {'name': 'Gwen', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Jessy', 'job': \\\"student\\\"}\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(job_list, columns = ['name', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"unique() gives you unique values of the column in list format\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 324,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"['teacher' 'student' 'developer' 'dentist' 'lawyer' 'banker'\\n\",\n      \" 'basketball player']\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print( df.job.unique() )\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"value_counts() gives you the number of item for each unique columns\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 325,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"teacher              8\\n\",\n       \"student              5\\n\",\n       \"lawyer               2\\n\",\n       \"banker               2\\n\",\n       \"developer            1\\n\",\n       \"dentist              1\\n\",\n       \"basketball player    1\\n\",\n       \"Name: job, dtype: int64\"\n      ]\n     },\n     \"execution_count\": 325,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.job.value_counts()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Concatenate two dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 326,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"l1 = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"      {'name': 'Nate', 'job': \\\"student\\\"},\\n\",\n    \"      {'name': 'Fred', 'job': \\\"developer\\\"}]\\n\",\n    \"\\n\",\n    \"l2 = [{'name': 'Ed', 'job': \\\"dentist\\\"},\\n\",\n    \"      {'name': 'Jack', 'job': \\\"farmer\\\"},\\n\",\n    \"      {'name': 'Ted', 'job': \\\"designer\\\"}]\\n\",\n    \"         \\n\",\n    \"df1 = pd.DataFrame(l1, columns = ['name', 'job'])\\n\",\n    \"df2 = pd.DataFrame(l2, columns = ['name', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## pd.concat\\n\",\n    \"below is to add second dataframe as new rows in first dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 327,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"frames = [df1, df2]\\n\",\n    \"result = pd.concat(frames, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 328,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Fred</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Ed</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Jack</td>\\n\",\n       \"      <td>farmer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Ted</td>\\n\",\n       \"      <td>designer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name        job\\n\",\n       \"0  John    teacher\\n\",\n       \"1  Nate    student\\n\",\n       \"2  Fred  developer\\n\",\n       \"3    Ed    dentist\\n\",\n       \"4  Jack     farmer\\n\",\n       \"5   Ted   designer\"\n      ]\n     },\n     \"execution_count\": 328,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"result\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## df.append\\n\",\n    \"below is to add second dataframe as new rows in first dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 329,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"l1 = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"      {'name': 'Nate', 'job': \\\"student\\\"},\\n\",\n    \"      {'name': 'Fred', 'job': \\\"developer\\\"}]\\n\",\n    \"\\n\",\n    \"l2 = [{'name': 'Ed', 'job': \\\"dentist\\\"},\\n\",\n    \"      {'name': 'Jack', 'job': \\\"farmer\\\"},\\n\",\n    \"      {'name': 'Ted', 'job': \\\"designer\\\"}]\\n\",\n    \"         \\n\",\n    \"df1 = pd.DataFrame(l1, columns = ['name', 'job'])\\n\",\n    \"df2 = pd.DataFrame(l2, columns = ['name', 'job'])\\n\",\n    \"result = df1.append(df2, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 330,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Fred</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Ed</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Jack</td>\\n\",\n       \"      <td>farmer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Ted</td>\\n\",\n       \"      <td>designer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name        job\\n\",\n       \"0  John    teacher\\n\",\n       \"1  Nate    student\\n\",\n       \"2  Fred  developer\\n\",\n       \"3    Ed    dentist\\n\",\n       \"4  Jack     farmer\\n\",\n       \"5   Ted   designer\"\n      ]\n     },\n     \"execution_count\": 330,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"result\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## pd.concat\\n\",\n    \"below is to add second dataframe as new columns in first dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 331,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"l1 = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"      {'name': 'Nate', 'job': \\\"student\\\"},\\n\",\n    \"      {'name': 'Jack', 'job': \\\"developer\\\"}]\\n\",\n    \"\\n\",\n    \"l2 = [{'age': 25, 'country': \\\"U.S\\\"},\\n\",\n    \"      {'age': 30, 'country': \\\"U.K\\\"},\\n\",\n    \"      {'age': 45, 'country': \\\"Korea\\\"}]\\n\",\n    \"         \\n\",\n    \"df1 = pd.DataFrame(l1, columns = ['name', 'job'])\\n\",\n    \"df2 = pd.DataFrame(l2, columns = ['age', 'country'])\\n\",\n    \"result = pd.concat([df1, df2], axis=1, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 332,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <th>3</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>U.S</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>U.K</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Jack</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>Korea</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      0          1   2      3\\n\",\n       \"0  John    teacher  25    U.S\\n\",\n       \"1  Nate    student  30    U.K\\n\",\n       \"2  Jack  developer  45  Korea\"\n      ]\n     },\n     \"execution_count\": 332,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"result\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Concatenate two list as a dataframe\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 333,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>label</th>\\n\",\n       \"      <th>prediction</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>1</td>\\n\",\n       \"      <td>1</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>3</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>4</td>\\n\",\n       \"      <td>5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>5</td>\\n\",\n       \"      <td>5</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   label  prediction\\n\",\n       \"0      1           1\\n\",\n       \"1      2           2\\n\",\n       \"2      3           2\\n\",\n       \"3      4           5\\n\",\n       \"4      5           5\"\n      ]\n     },\n     \"execution_count\": 333,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"label = [1,2,3,4,5]\\n\",\n    \"prediction = [1,2,2,5,5]\\n\",\n    \"\\n\",\n    \"comparison = pd.DataFrame(\\n\",\n    \"    {'label': label,\\n\",\n    \"     'prediction': prediction\\n\",\n    \"    })\\n\",\n    \"\\n\",\n    \"comparison\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.4\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "data/friend_list.csv",
    "content": "name,age,job\rJohn,20,student\rJenny,30,developer\rNate,30,teacher\rJulia,40,dentist\rBrian,45,manager\rChris,25,intern"
  },
  {
    "path": "data/friend_list.txt",
    "content": "name,age,job\rJohn,20,student\rJenny,30,developer\rNate,30,teacher\rJulia,40,dentist\rBrian,45,manager\rChris,25,intern"
  },
  {
    "path": "data/friend_list_no_head.csv",
    "content": "John,20,student\rJenny,30,developer\rNate,30,teacher\rJulia,40,dentist\rBrian,45,manager\rChris,25,intern\n"
  },
  {
    "path": "data/friend_list_tab.txt",
    "content": "name\tage\tjob\rJohn\t20\tstudent\rJenny\t30\tdeveloper\rNate\t30\tteacher\rJulia\t40\tdentist\rBrian\t45\tmanager\rChris\t25\tintern\n"
  },
  {
    "path": "팬더스_명령어_꿀팁.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Pandas는 무엇인가요?\\n\",\n    \"데이터 분석 및 가공에 사용되는 파이썬 라이브러리입니다\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 156,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 156,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"import pandas as pd\\n\",\n    \"data_frame = pd.read_csv('data/friend_list.csv')\\n\",\n    \"data_frame.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 데이터프레임은 무엇인가요?\\n\",\n    \"가로축과 세로축이 있는 엑셀과 유사한 데이터구조입니다. 가로축은 로우(행), 세로축은 컬럼(열)이라고 합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 157,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 157,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# 데이터프레임이 가지고 있는 함수의 예제입니다.\\n\",\n    \"data_frame.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 시리즈는 무엇인가요?\\n\",\n    \"데이터프레임의 컬럼(행)은 모두 시리즈입니다. 위의 예제는 3개의 시리즈로 구성된 데이터프레임입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 158,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"pandas.core.series.Series\"\n      ]\n     },\n     \"execution_count\": 158,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"type(data_frame.job)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 159,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>STUDENT</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>DEVELOPER</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>TEACHER</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>DENTIST</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>MANAGER</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    STUDENT\\n\",\n       \"1  Jenny   30  DEVELOPER\\n\",\n       \"2   Nate   30    TEACHER\\n\",\n       \"3  Julia   40    DENTIST\\n\",\n       \"4  Brian   45    MANAGER\"\n      ]\n     },\n     \"execution_count\": 159,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# 시리즈의 함수 예제입니다.\\n\",\n    \"data_frame.job = data_frame.job.str.upper()\\n\",\n    \"data_frame.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"시리즈는 단순히 파이썬 리스트를 간직한 오브젝트입니다.  \\n\",\n    \"리스트를 파라미터로 주면 바로 시리즈가 생성됩니다.  \\n\",\n    \"시리즈는 데이터 가공 및 분석이 파이썬 리스트보다 훨씬 쉽습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 160,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"s1 = pd.core.series.Series(['one', 'two', 'three'])\\n\",\n    \"s2 = pd.core.series.Series([1, 2, 3])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 161,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>num</th>\\n\",\n       \"      <th>word</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>1</td>\\n\",\n       \"      <td>one</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2</td>\\n\",\n       \"      <td>two</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>3</td>\\n\",\n       \"      <td>three</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   num   word\\n\",\n       \"0    1    one\\n\",\n       \"1    2    two\\n\",\n       \"2    3  three\"\n      ]\n     },\n     \"execution_count\": 161,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"pd.DataFrame(data=dict(word=s1, num=s2))\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 왜 팬더스를 쓰나요?\\n\",\n    \"\\n\",\n    \"엑셀과 상당히 유사합니다, 데이터의 수정/가공 및 분석이 용이합니다.    \\n\",\n    \"데이터 가공을 위한 수많은 함수를 지원합니다.  \\n\",\n    \"Numpy 기반으로 데이터 처리가 상당히 빠릅니다.  \"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 파일을 데이터프레임으로 불러오기\\n\",\n    \"데이터프레임 (dataframe)은 2차원 자료구조입니다. 로우와 컬럼으로 엑셀 형식과 유사합니다.  \\n\",\n    \"기본적으로 csv 포맷을 지원하지만, 구분자로 컬럼이 구분되어 있는 데이터는 모두 지원합니다.  \\n\",\n    \"read_csv 함수로 파일을 데이터프레임으로 호출할 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 162,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 163,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 163,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 csv 파일이 아닌 파일을 호출하는 예제입니다.  \\n\",\n    \"파일명은 txt이지만, 쉼표로 컬럼이 구분되어 있는 파일입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 164,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list.txt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 165,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 165,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"만약 파일의 컬럼들이 쉼표로 구분되어 있지 않을 경우라도, delimiter 파라미터에 구분자를 지정해줘서  \\n\",\n    \"컬럼을 제대로 나줘줄 수 있습니다. 아래는 탭으로 컬럼이 구분된 경우의 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 166,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list_tab.txt', delimiter = \\\"\\\\t\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 167,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 167,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"만약 파일에 데이터 헤더가 없을 경우, header = None으로 지정해줘서,  \\n\",\n    \"첫번째 데이터가 데이터 헤더로 들어가는 것을 방지해줘야합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 168,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list_no_head.csv', header = None)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 169,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\\n\",\n       \"3  Julia  40    dentist\\n\",\n       \"4  Brian  45    manager\"\n      ]\n     },\n     \"execution_count\": 169,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"헤더가 없는 데이터를 데이터프레임으로 호출했을 경우,  \\n\",\n    \"아래와 같이 데이터프레임 생성 후에, 컬럼 헤더를 지정해주실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 170,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.columns = ['name', 'age', 'job']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 171,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 171,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"또한, 파일을 데이터프레임으로 호출하는 동시에, 헤더를 아래와 같이 지정해주실 수도 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 172,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv('data/friend_list_no_head.csv', header = None, names=['name', 'age', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 173,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\"\n      ]\n     },\n     \"execution_count\": 173,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 데이터프레임을 파이썬 코드로 생성하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 딕셔너리로 데이터프레임 생성하기\\n\",\n    \"파이썬의 기본 자료구조로 데이터프레임 생성이 가능합니다.  \\n\",\n    \"아래의 예제는 딕셔너리로 데이터프레임을 생성하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 174,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 175,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 175,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"데이터프레임 생성 시, 컬럼의 순서가 뒤바뀔 수 있습니다.  \\n\",\n    \"아래와 같이 컬럼을 원하시는 순서로 지정하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 176,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df[['name', 'age', 'job']]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 177,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   Jone   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 177,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## OrderedDict로 데이터프레임 생성하기\\n\",\n    \"OrderedDict 자료구조로 데이터프레임을 생성하면, 컬럼의 순서가 뒤바뀌지 않습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 178,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from collections import OrderedDict\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 179,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_ordered_dict = OrderedDict([ ('name', ['John', 'Jenny', 'Nate']),\\n\",\n    \"          ('age', [20, 30, 30]),\\n\",\n    \"          ('job', ['student', 'developer', 'teacher']) ] )\\n\",\n    \"df = pd.DataFrame.from_dict(friend_ordered_dict)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 180,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 180,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"collapsed\": true\n   },\n   \"source\": [\n    \"## list로 데이터프레임 생성하기\\n\",\n    \"리스트로 데이터프레임을 생성하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 181,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ ['John', 20, 'student'],['Jenny', 30, 'developer'],['Nate', 30, 'teacher'] ]\\n\",\n    \"column_name = ['name', 'age', 'job']\\n\",\n    \"df = pd.DataFrame.from_records(friend_list, columns=column_name)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 182,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 182,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 183,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', 'Jenny', 'Nate']],\\n\",\n    \"                ['age',[20,30,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 184,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 184,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 파일로 데이터프레임을 저장하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 데이터프레임을 헤더와 함께 저장하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 185,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', 'Jenny', 'nate']],\\n\",\n    \"                ['age',[20,30,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 186,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 186,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"to_csv 함수를 사용하여 파일로 저장하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 187,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 헤더가 없는 데이터프레임의 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 188,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ ['John', 20, 'student'],['Jenny', 30, 'developer'],['Nate', 30, 'teacher'] ]\\n\",\n    \"df = pd.DataFrame.from_records(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 189,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\"\n      ]\n     },\n     \"execution_count\": 189,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"to_csv 함수로 파일로 저장하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 190,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"사실 파일의 확장자명은 원하시는대로 주셔도 무방합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 191,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.txt')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"기본적으로, 헤더와 인덱스값은 주시지 않아도, 기본적으로 True로 설정되어 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 192,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', header = True, index = True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**header = False** 는 컬럼 이름을 파일에 저장하지 않겠다라는 의미입니다. 예제에서 0,1,2가 헤더에 저장되지 않습니다.   \\n\",\n    \"**index = False** 는 로우 인덱스를 파일에 저장하지 않겠다라는 의미입니다.  예제에서 0,1,2가 로우 인덱스에 저장되지 않습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 193,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', header = False, index = False)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"헤더 정보를 원하실 경우, header 키워드로 컬럼 이름을 파일에 저장하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 194,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', header = ['name', 'age', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 **None** 값이 있는 데이터프레임의 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 195,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', None, 'nate']],\\n\",\n    \"                ['age',[20,None,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 196,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>nate</td>\\n\",\n       \"      <td>30.0</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name   age        job\\n\",\n       \"0  John  20.0    student\\n\",\n       \"1  None   NaN  developer\\n\",\n       \"2  nate  30.0    teacher\"\n      ]\n     },\n     \"execution_count\": 196,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 197,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"**na_rep** 을 사용하시면 **None** 을 원하시는 값으로 쉽게 변경하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 198,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.to_csv('friend_list_from_df.csv', na_rep = '-')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 로우 선택하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 인덱스로 로우 선택하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 199,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_list = [ \\n\",\n    \"                ['name',['John', 'Jenny', 'Nate']],\\n\",\n    \"                ['age',[20,30,30]],\\n\",\n    \"                ['job',['student', 'developer', 'teacher']] \\n\",\n    \"              ]\\n\",\n    \"df = pd.DataFrame.from_items(friend_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 200,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 200,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 로우 인덱스를 사용하여 로우1부터 3까지 순차적으로 선택하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 201,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 201,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df[1:3]\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 순차적이지 않은 로우를 선택하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 202,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name  age      job\\n\",\n       \"0  John   20  student\\n\",\n       \"2  Nate   30  teacher\"\n      ]\n     },\n     \"execution_count\": 202,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.loc[[0,2]]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 203,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 203,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 컬럼값에 따른 로우 선택하기\\n\",\n    \"마치 데이터베이스에 쿼리를 전달하듯, 특정한 컬럼값을 충족하는 로우만 선택하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 204,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df_filtered = df[df.age > 25]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 205,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 205,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 206,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df_filtered = df.query('age>25')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 207,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 207,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 208,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df_filtered = df[(df.age >25) & (df.name == 'Nate')]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 209,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name  age      job\\n\",\n       \"2  Nate   30  teacher\"\n      ]\n     },\n     \"execution_count\": 209,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 210,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 210,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 컬럼 필터하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 인덱스로 필터하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 211,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\"\n      ]\n     },\n     \"execution_count\": 211,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_list = [ ['John', 20, 'student'],['Jenny', 30, 'developer'],['Nate', 30, 'teacher'] ]\\n\",\n    \"df = pd.DataFrame.from_records(friend_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"모든 로우를 보여주되, 컬럼은 0부터 1까지만 출력하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 212,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1\\n\",\n       \"0   John  20\\n\",\n       \"1  Jenny  30\\n\",\n       \"2   Nate  30\"\n      ]\n     },\n     \"execution_count\": 212,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.iloc[:, 0:2]\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"모든 로우를 보여주되, 컬럼 0와 2만 출력하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 213,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0          2\\n\",\n       \"0   John    student\\n\",\n       \"1  Jenny  developer\\n\",\n       \"2   Nate    teacher\"\n      ]\n     },\n     \"execution_count\": 213,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.iloc[:,[0,2]]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 214,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       0   1          2\\n\",\n       \"0   John  20    student\\n\",\n       \"1  Jenny  30  developer\\n\",\n       \"2   Nate  30    teacher\"\n      ]\n     },\n     \"execution_count\": 214,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 컬럼 이름으로 필터하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 215,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 215,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# you can create column header for no header data at once\\n\",\n    \"df = pd.read_csv('data/friend_list_no_head.csv', header = None, names=['name', 'age', 'job'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 216,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age\\n\",\n       \"0   John   20\\n\",\n       \"1  Jenny   30\\n\",\n       \"2   Nate   30\\n\",\n       \"3  Julia   40\\n\",\n       \"4  Brian   45\\n\",\n       \"5  Chris   25\"\n      ]\n     },\n     \"execution_count\": 216,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_filtered = df[['name', 'age']]\\n\",\n    \"df_filtered\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 217,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job\\n\",\n       \"0   20    student\\n\",\n       \"1   30  developer\\n\",\n       \"2   30    teacher\\n\",\n       \"3   40    dentist\\n\",\n       \"4   45    manager\\n\",\n       \"5   25     intern\"\n      ]\n     },\n     \"execution_count\": 217,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.filter(items=['age', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 218,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   John   20    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\\n\",\n       \"3  Julia   40    dentist\\n\",\n       \"4  Brian   45    manager\\n\",\n       \"5  Chris   25     intern\"\n      ]\n     },\n     \"execution_count\": 218,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"원하는 글자를 가진 로우를 보여줍니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 219,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>20</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Julia</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Chris</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age\\n\",\n       \"0   John   20\\n\",\n       \"1  Jenny   30\\n\",\n       \"2   Nate   30\\n\",\n       \"3  Julia   40\\n\",\n       \"4  Brian   45\\n\",\n       \"5  Chris   25\"\n      ]\n     },\n     \"execution_count\": 219,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# select columns containing 'a'\\n\",\n    \"df.filter(like='a',axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"정규식으로 필터도 가능합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 220,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>manager</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>intern</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"         job\\n\",\n       \"0    student\\n\",\n       \"1  developer\\n\",\n       \"2    teacher\\n\",\n       \"3    dentist\\n\",\n       \"4    manager\\n\",\n       \"5     intern\"\n      ]\n     },\n     \"execution_count\": 220,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"# select columns using regex\\n\",\n    \"df.filter(regex='b$',axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 로우 드롭하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"로우 인덱스로 로우를 드롭할 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 221,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'age': 20, 'job': 'student'},\\n\",\n    \"         {'age': 30, 'job': 'developer'},\\n\",\n    \"         {'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, index = ['John', 'Jenny', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 222,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>John</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Nate</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"John    20    student\\n\",\n       \"Jenny   30  developer\\n\",\n       \"Nate    30    teacher\"\n      ]\n     },\n     \"execution_count\": 222,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"드롭된 결과는 데이터프레임에 저장되지 않습니다. 저장하고 싶으실 경우, 결과를 데이터프레임에 따로 저장하셔야 합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 223,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"Jenny   30  developer\"\n      ]\n     },\n     \"execution_count\": 223,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.drop(['John', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 224,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>John</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Nate</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"John    20    student\\n\",\n       \"Jenny   30  developer\\n\",\n       \"Nate    30    teacher\"\n      ]\n     },\n     \"execution_count\": 224,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"드롭된 결과를 데이터프레임에 저장하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 225,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop(['John', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 226,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"Jenny   30  developer\"\n      ]\n     },\n     \"execution_count\": 226,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### 드롭된 결과를 바로 데이터프레임에 저장하는 방법\\n\",\n    \"inplace 키워드를 사용하시면, 따로 저장할 필요없이, 드롭된 결과가 데이터프레임에 반영됩니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 227,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'age': 20, 'job': 'student'},\\n\",\n    \"         {'age': 30, 'job': 'developer'},\\n\",\n    \"         {'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, index = ['John', 'Jenny', 'Nate'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 228,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df.drop(['John', 'Nate'], inplace = True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 229,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>Jenny</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"       age        job\\n\",\n       \"Jenny   30  developer\"\n      ]\n     },\n     \"execution_count\": 229,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 로우 인덱스로 드롭하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 230,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 231,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 231,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"로우 인덱스로 드롭하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 232,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop(df.index[[0,2]])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 233,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"1   30  developer  Jenny\"\n      ]\n     },\n     \"execution_count\": 233,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 컬럼값으로 로우 드롭하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 234,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 234,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 235,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df[df.age != 30]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 236,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age      job  name\\n\",\n       \"0   20  student  Jone\"\n      ]\n     },\n     \"execution_count\": 236,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 컬럼 드롭하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 237,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job   name\\n\",\n       \"0   20    student   Jone\\n\",\n       \"1   30  developer  Jenny\\n\",\n       \"2   30    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 237,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 238,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop('age', axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 239,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"         job   name\\n\",\n       \"0    student   Jone\\n\",\n       \"1  developer  Jenny\\n\",\n       \"2    teacher   Nate\"\n      ]\n     },\n     \"execution_count\": 239,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"collapsed\": true\n   },\n   \"source\": [\n    \"# 컬럼 추가 또는 변경하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 240,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   Jone   15    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 240,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 15, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'age', 'job'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래와 같은 방법으로 새로운 컬럼을 기본값과 함께 추가하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 241,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['salary'] = 0\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 242,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>salary</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job  salary\\n\",\n       \"0   Jone   15    student       0\\n\",\n       \"1  Jenny   30  developer       0\\n\",\n       \"2   Nate   30    teacher       0\"\n      ]\n     },\n     \"execution_count\": 242,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"기존 컬럼값을 가지고 새로운 컬럼을 생성하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 243,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job\\n\",\n       \"0   Jone   15    student\\n\",\n       \"1  Jenny   30  developer\\n\",\n       \"2   Nate   30    teacher\"\n      ]\n     },\n     \"execution_count\": 243,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'Jone', 'age': 15, 'job': 'student'},\\n\",\n    \"         {'name': 'Jenny', 'age': 30, 'job': 'developer'},\\n\",\n    \"         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'age', 'job'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"넘파이를 사용하셔서, 한줄에 새로운 컬럼값을 생성하실 수도 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 244,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import numpy as np\\n\",\n    \"df['salary'] = np.where(df['job'] != 'student' , 'yes', 'no')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 245,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>salary</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Jone</td>\\n\",\n       \"      <td>15</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>no</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>yes</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>yes</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  age        job salary\\n\",\n       \"0   Jone   15    student     no\\n\",\n       \"1  Jenny   30  developer    yes\\n\",\n       \"2   Nate   30    teacher    yes\"\n      ]\n     },\n     \"execution_count\": 245,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 246,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final\\n\",\n       \"0   John       95     85\\n\",\n       \"1  Jenny       85     80\\n\",\n       \"2   Nate       10     30\"\n      ]\n     },\n     \"execution_count\": 246,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'John', 'midterm': 95, 'final': 85},\\n\",\n    \"         {'name': 'Jenny', 'midterm': 85, 'final': 80},\\n\",\n    \"         {'name': 'Nate', 'midterm': 10, 'final': 30}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 기존에 있는 두 컬럼값을 더해서 새로운 컬럼을 만드는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 247,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['total'] = df['midterm'] + df['final']\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 248,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total\\n\",\n       \"0   John       95     85    180\\n\",\n       \"1  Jenny       85     80    165\\n\",\n       \"2   Nate       10     30     40\"\n      ]\n     },\n     \"execution_count\": 248,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"기존의 컬럼을 사용하여 새로운 컬럼을 만드는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 249,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['average'] = df['total'] / 2\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 250,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"      <th>average</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>90.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>82.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total  average\\n\",\n       \"0   John       95     85    180     90.0\\n\",\n       \"1  Jenny       85     80    165     82.5\\n\",\n       \"2   Nate       10     30     40     20.0\"\n      ]\n     },\n     \"execution_count\": 250,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래와 같이, 리스트에 조건별 값을 담아서, 새로운 컬럼으로 추가시킬 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 251,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"grades = []\\n\",\n    \"\\n\",\n    \"for row in df['average']:\\n\",\n    \"    if row >= 90:\\n\",\n    \"        grades.append('A')\\n\",\n    \"    elif row >= 80:\\n\",\n    \"        grades.append('B')\\n\",\n    \"    elif row >= 70:\\n\",\n    \"        grades.append('C')\\n\",\n    \"    else:\\n\",\n    \"        grades.append('F')\\n\",\n    \"        \\n\",\n    \"df['grade'] = grades\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 252,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"      <th>average</th>\\n\",\n       \"      <th>grade</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>90.0</td>\\n\",\n       \"      <td>A</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>82.5</td>\\n\",\n       \"      <td>B</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"      <td>F</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total  average grade\\n\",\n       \"0   John       95     85    180     90.0     A\\n\",\n       \"1  Jenny       85     80    165     82.5     B\\n\",\n       \"2   Nate       10     30     40     20.0     F\"\n      ]\n     },\n     \"execution_count\": 252,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"apply 함수 사용 예제입니다.  \\n\",\n    \"apply를 사용하시면, 깔끔하게 컬럼의 값을 변경하는 코드를 구현하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 253,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def pass_or_fail(row):\\n\",\n    \"    print(row)\\n\",\n    \"    if row != \\\"F\\\":\\n\",\n    \"        return 'Pass'\\n\",\n    \"    else:\\n\",\n    \"        return 'Fail'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 254,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"A\\n\",\n      \"B\\n\",\n      \"F\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"df.grade = df.grade.apply(pass_or_fail)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 255,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"      <th>total</th>\\n\",\n       \"      <th>average</th>\\n\",\n       \"      <th>grade</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>90.0</td>\\n\",\n       \"      <td>Pass</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>165</td>\\n\",\n       \"      <td>82.5</td>\\n\",\n       \"      <td>Pass</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>20.0</td>\\n\",\n       \"      <td>Fail</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final  total  average grade\\n\",\n       \"0   John       95     85    180     90.0  Pass\\n\",\n       \"1  Jenny       85     80    165     82.5  Pass\\n\",\n       \"2   Nate       10     30     40     20.0  Fail\"\n      ]\n     },\n     \"execution_count\": 255,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"apply를 사용해서 연월일의 정보에서 연도만 빼보는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 256,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd\\n\",\n       \"0  2000-06-27\\n\",\n       \"1  2002-09-24\\n\",\n       \"2  2005-12-20\"\n      ]\n     },\n     \"execution_count\": 256,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"date_list = [{'yyyy-mm-dd': '2000-06-27'},\\n\",\n    \"         {'yyyy-mm-dd': '2002-09-24'},\\n\",\n    \"         {'yyyy-mm-dd': '2005-12-20'}]\\n\",\n    \"df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 257,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def extract_year(row):\\n\",\n    \"    return row.split('-')[0]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 258,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df['year'] = df['yyyy-mm-dd'].apply(extract_year)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 259,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year\\n\",\n       \"0  2000-06-27  2000\\n\",\n       \"1  2002-09-24  2002\\n\",\n       \"2  2005-12-20  2005\"\n      ]\n     },\n     \"execution_count\": 259,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### apply 함수에 파라미터 전달하기\\n\",\n    \"키워드 파라미터를 사용하시면, apply가 적용된 함수에 파라미터를 전달하실 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 260,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def extract_year(year, current_year):\\n\",\n    \"    return current_year - int(year)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 261,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"      <td>16</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"      <td>13</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year  age\\n\",\n       \"0  2000-06-27  2000   18\\n\",\n       \"1  2002-09-24  2002   16\\n\",\n       \"2  2005-12-20  2005   13\"\n      ]\n     },\n     \"execution_count\": 261,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df['age'] = df['year'].apply(extract_year, current_year=2018)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### apply 함수에 한 개 이상의 파라미터 전달하기\\n\",\n    \"키워드 파라미터를 추가해주시면, 원하시는만큼의 파라미터를 함수에 전달 가능합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 262,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_introduce(age, prefix, suffix):\\n\",\n    \"    return prefix + str(age) + suffix\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 263,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>introduce</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"      <td>I am 18 years old</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"      <td>16</td>\\n\",\n       \"      <td>I am 16 years old</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"      <td>13</td>\\n\",\n       \"      <td>I am 13 years old</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year  age          introduce\\n\",\n       \"0  2000-06-27  2000   18  I am 18 years old\\n\",\n       \"1  2002-09-24  2002   16  I am 16 years old\\n\",\n       \"2  2005-12-20  2005   13  I am 13 years old\"\n      ]\n     },\n     \"execution_count\": 263,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df['introduce'] = df['age'].apply(get_introduce, prefix=\\\"I am \\\", suffix=\\\" years old\\\")\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### apply 함수에 여러개의 컬럼을 동시에 전달하기\\n\",\n    \"axis=1이라는 키워드 파라미터를 apply 함수에 전달해주면, 모든 컬럼을 지정된 함수에서 사용 가능합니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 264,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def get_introduce2(row):\\n\",\n    \"    return \\\"I was born in \\\"+str(row.year)+\\\" my age is \\\"+str(row.age)\\n\",\n    \"\\n\",\n    \"df.introduce = df.apply(get_introduce2, axis=1)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 265,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>introduce</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"      <td>18</td>\\n\",\n       \"      <td>I was born in 2000 my age is 18</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"      <td>16</td>\\n\",\n       \"      <td>I was born in 2002 my age is 16</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"      <td>13</td>\\n\",\n       \"      <td>I was born in 2005 my age is 13</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year  age                        introduce\\n\",\n       \"0  2000-06-27  2000   18  I was born in 2000 my age is 18\\n\",\n       \"1  2002-09-24  2002   16  I was born in 2002 my age is 16\\n\",\n       \"2  2005-12-20  2005   13  I was born in 2005 my age is 13\"\n      ]\n     },\n     \"execution_count\": 265,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Map 함수로 컬럼 추가 및 변경하기\\n\",\n    \"파라미터로 함수를 전달하면 apply 함수와 동일하게 컬럼값을 추가 및 변경할 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 266,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd\\n\",\n       \"0  2000-06-27\\n\",\n       \"1  2002-09-24\\n\",\n       \"2  2005-12-20\"\n      ]\n     },\n     \"execution_count\": 266,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"def extract_year(row):\\n\",\n    \"    return row.split('-')[0]\\n\",\n    \"\\n\",\n    \"date_list = [{'yyyy-mm-dd': '2000-06-27'},\\n\",\n    \"         {'yyyy-mm-dd': '2002-09-24'},\\n\",\n    \"         {'yyyy-mm-dd': '2005-12-20'}]\\n\",\n    \"df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 267,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>yyyy-mm-dd</th>\\n\",\n       \"      <th>year</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>2000-06-27</td>\\n\",\n       \"      <td>2000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2002-09-24</td>\\n\",\n       \"      <td>2002</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>2005-12-20</td>\\n\",\n       \"      <td>2005</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   yyyy-mm-dd  year\\n\",\n       \"0  2000-06-27  2000\\n\",\n       \"1  2002-09-24  2002\\n\",\n       \"2  2005-12-20  2005\"\n      ]\n     },\n     \"execution_count\": 267,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df['year'] = df['yyyy-mm-dd'].map(extract_year)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"파라미터로 딕셔너리를 전달하면 컬럼값을 쉽게 원하는 값으로 변경 가능합니다.  \\n\",\n    \"기존의 컬럼값은 딕셔너리의 key로 사용되고, 해당되는 value의 값으로 컬럼값이 변경됩니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 268,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age        job\\n\",\n       \"0   20    student\\n\",\n       \"1   30  developer\\n\",\n       \"2   30    teacher\"\n      ]\n     },\n     \"execution_count\": 268,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"job_list = [{'age': 20, 'job': 'student'},\\n\",\n    \"         {'age': 30, 'job': 'developer'},\\n\",\n    \"         {'age': 30, 'job': 'teacher'}]\\n\",\n    \"df = pd.DataFrame(job_list)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 269,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>1</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   age  job\\n\",\n       \"0   20    1\\n\",\n       \"1   30    2\\n\",\n       \"2   30    3\"\n      ]\n     },\n     \"execution_count\": 269,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.job = df.job.map({\\\"student\\\":1,\\\"developer\\\":2,\\\"teacher\\\":3})\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Applymap\\n\",\n    \"데이터프레임 전체의 각각의 값을 한번에 변경시키실 때 사용하시면 좋습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 270,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>x</th>\\n\",\n       \"      <th>y</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>5.5</td>\\n\",\n       \"      <td>-5.6</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>-5.2</td>\\n\",\n       \"      <td>5.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>-1.6</td>\\n\",\n       \"      <td>-4.5</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"     x    y\\n\",\n       \"0  5.5 -5.6\\n\",\n       \"1 -5.2  5.5\\n\",\n       \"2 -1.6 -4.5\"\n      ]\n     },\n     \"execution_count\": 270,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"x_y = [{'x': 5.5, 'y': -5.6},\\n\",\n    \"         {'x': -5.2, 'y': 5.5},\\n\",\n    \"         {'x': -1.6, 'y': -4.5}]\\n\",\n    \"df = pd.DataFrame(x_y)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 271,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>x</th>\\n\",\n       \"      <th>y</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>6.0</td>\\n\",\n       \"      <td>-6.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>-5.0</td>\\n\",\n       \"      <td>6.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>-2.0</td>\\n\",\n       \"      <td>-4.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"     x    y\\n\",\n       \"0  6.0 -6.0\\n\",\n       \"1 -5.0  6.0\\n\",\n       \"2 -2.0 -4.0\"\n      ]\n     },\n     \"execution_count\": 271,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df = df.applymap(np.around)\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 데이터프레임에 로우 추가하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 272,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final\\n\",\n       \"0   John       95     85\\n\",\n       \"1  Jenny       85     80\\n\",\n       \"2   Nate       10     30\"\n      ]\n     },\n     \"execution_count\": 272,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"friend_dict_list = [{'name': 'John', 'midterm': 95, 'final': 85},\\n\",\n    \"         {'name': 'Jenny', 'midterm': 85, 'final': 80},\\n\",\n    \"         {'name': 'Nate', 'midterm': 10, 'final': 30}]\\n\",\n    \"df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 273,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df2 = pd.DataFrame([['Ben', 50,50]], columns = ['name', 'midterm', 'final'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 274,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Ben</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"  name  midterm  final\\n\",\n       \"0  Ben       50     50\"\n      ]\n     },\n     \"execution_count\": 274,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df2.head()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 275,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>midterm</th>\\n\",\n       \"      <th>final</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>95</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Jenny</td>\\n\",\n       \"      <td>85</td>\\n\",\n       \"      <td>80</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Ben</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"      <td>50</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name  midterm  final\\n\",\n       \"0   John       95     85\\n\",\n       \"1  Jenny       85     80\\n\",\n       \"2   Nate       10     30\\n\",\n       \"3    Ben       50     50\"\n      ]\n     },\n     \"execution_count\": 275,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.append(df2, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Group by\\n\",\n    \"데이터에서 정보를 취하기 위해서 그룹별로 묶는 방법에 대해 알아보겠습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 276,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Sera</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Sera        Psychology  female\"\n      ]\n     },\n     \"execution_count\": 276,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"student_list = [{'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Abraham', 'major': \\\"Physics\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Brian', 'major': \\\"Psychology\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Janny', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Yuna', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Jeniffer', 'major': \\\"Computer Science\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Edward', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Zara', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Wendy', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Sera', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"}\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 277,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"groupby_major = df.groupby('major')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 278,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"{'Computer Science': Int64Index([0, 1, 6, 7], dtype='int64'),\\n\",\n       \" 'Economics': Int64Index([4, 5, 9], dtype='int64'),\\n\",\n       \" 'Physics': Int64Index([2], dtype='int64'),\\n\",\n       \" 'Psychology': Int64Index([3, 8, 10], dtype='int64')}\"\n      ]\n     },\n     \"execution_count\": 278,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"groupby_major.groups\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"here we can see, computer science has mostly man, while economic has mostly woman students\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 279,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Computer Science: 4\\n\",\n      \"       name             major     sex\\n\",\n      \"0      John  Computer Science    male\\n\",\n      \"1      Nate  Computer Science    male\\n\",\n      \"6  Jeniffer  Computer Science  female\\n\",\n      \"7    Edward  Computer Science    male\\n\",\n      \"\\n\",\n      \"Economics: 3\\n\",\n      \"    name      major     sex\\n\",\n      \"4  Janny  Economics  female\\n\",\n      \"5   Yuna  Economics  female\\n\",\n      \"9  Wendy  Economics  female\\n\",\n      \"\\n\",\n      \"Physics: 1\\n\",\n      \"      name    major   sex\\n\",\n      \"2  Abraham  Physics  male\\n\",\n      \"\\n\",\n      \"Psychology: 3\\n\",\n      \"     name       major     sex\\n\",\n      \"3   Brian  Psychology    male\\n\",\n      \"8    Zara  Psychology  female\\n\",\n      \"10   Sera  Psychology  female\\n\",\n      \"\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"for name, group in groupby_major:\\n\",\n    \"    print(name + \\\": \\\" + str(len(group)))\\n\",\n    \"    print(group)\\n\",\n    \"    print()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"그룹 객체를 다시 데이터프레임으로 생성하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 280,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>count</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>4</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>1</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>3</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"              major  count\\n\",\n       \"0  Computer Science      4\\n\",\n       \"1         Economics      3\\n\",\n       \"2           Physics      1\\n\",\n       \"3        Psychology      3\"\n      ]\n     },\n     \"execution_count\": 280,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_major_cnt = pd.DataFrame({'count' : groupby_major.size()}).reset_index()\\n\",\n    \"df_major_cnt\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 281,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"groupby_sex = df.groupby('sex')\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래의 출력을 통해, 이 학교의 남녀 성비가 균등하다는 정보를 알 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 282,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"female: 6\\n\",\n      \"        name             major     sex\\n\",\n      \"4      Janny         Economics  female\\n\",\n      \"5       Yuna         Economics  female\\n\",\n      \"6   Jeniffer  Computer Science  female\\n\",\n      \"8       Zara        Psychology  female\\n\",\n      \"9      Wendy         Economics  female\\n\",\n      \"10      Sera        Psychology  female\\n\",\n      \"\\n\",\n      \"male: 5\\n\",\n      \"      name             major   sex\\n\",\n      \"0     John  Computer Science  male\\n\",\n      \"1     Nate  Computer Science  male\\n\",\n      \"2  Abraham           Physics  male\\n\",\n      \"3    Brian        Psychology  male\\n\",\n      \"7   Edward  Computer Science  male\\n\",\n      \"\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"for name, group in groupby_sex:\\n\",\n    \"    print(name + \\\": \\\" + str(len(group)))\\n\",\n    \"    print(group)\\n\",\n    \"    print()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 283,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"      <th>count</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>female</td>\\n\",\n       \"      <td>6</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>male</td>\\n\",\n       \"      <td>5</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      sex  count\\n\",\n       \"0  female      6\\n\",\n       \"1    male      5\"\n      ]\n     },\n     \"execution_count\": 283,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df_sex_cnt = pd.DataFrame({'count' : groupby_sex.size()}).reset_index()\\n\",\n    \"df_sex_cnt\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 중복 데이터 드롭하기\\n\",\n    \"중복된 데이터 드롭하는 방법에 대해 알아보겠습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 284,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Sera</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Sera        Psychology  female\\n\",\n       \"11      John  Computer Science    male\"\n      ]\n     },\n     \"execution_count\": 284,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"student_list = [{'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Abraham', 'major': \\\"Physics\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Brian', 'major': \\\"Psychology\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Janny', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Yuna', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Jeniffer', 'major': \\\"Computer Science\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Edward', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Zara', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Wendy', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Sera', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 중복된 데이터 확인 하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 285,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"0     False\\n\",\n       \"1     False\\n\",\n       \"2     False\\n\",\n       \"3     False\\n\",\n       \"4     False\\n\",\n       \"5     False\\n\",\n       \"6     False\\n\",\n       \"7     False\\n\",\n       \"8     False\\n\",\n       \"9     False\\n\",\n       \"10    False\\n\",\n       \"11     True\\n\",\n       \"dtype: bool\"\n      ]\n     },\n     \"execution_count\": 285,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.duplicated()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"drop_duplicates 함수로 중복 데이터를 삭제하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 286,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = df.drop_duplicates()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 287,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Sera</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Sera        Psychology  female\"\n      ]\n     },\n     \"execution_count\": 287,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 288,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Nate              None    male\\n\",\n       \"11      John  Computer Science    None\"\n      ]\n     },\n     \"execution_count\": 288,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"student_list = [{'name': 'John', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Abraham', 'major': \\\"Physics\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Brian', 'major': \\\"Psychology\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Janny', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Yuna', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Jeniffer', 'major': \\\"Computer Science\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Edward', 'major': \\\"Computer Science\\\", 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'Zara', 'major': \\\"Psychology\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Wendy', 'major': \\\"Economics\\\", 'sex': \\\"female\\\"},\\n\",\n    \"                {'name': 'Nate', 'major': None, 'sex': \\\"male\\\"},\\n\",\n    \"                {'name': 'John', 'major': \\\"Computer Science\\\", 'sex': None},\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"name 컬럼이 똑같을 경우, 중복된 데이터라고 표시하라는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 289,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"0     False\\n\",\n       \"1     False\\n\",\n       \"2     False\\n\",\n       \"3     False\\n\",\n       \"4     False\\n\",\n       \"5     False\\n\",\n       \"6     False\\n\",\n       \"7     False\\n\",\n       \"8     False\\n\",\n       \"9     False\\n\",\n       \"10     True\\n\",\n       \"11     True\\n\",\n       \"dtype: bool\"\n      ]\n     },\n     \"execution_count\": 289,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.duplicated(['name'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"keep 값을 first 또는 last라고 값을 줘서 중복된 값 중, 어느값을 살릴 지 결정하실 수 있습니다.  \\n\",\n    \"기본적으로 first로 설정되어 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 290,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Nate              None    male\\n\",\n       \"11      John  Computer Science    None\"\n      ]\n     },\n     \"execution_count\": 290,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.drop_duplicates(['name'], keep='last')\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 291,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>major</th>\\n\",\n       \"      <th>sex</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>Physics</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Jeniffer</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>Edward</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>8</th>\\n\",\n       \"      <td>Zara</td>\\n\",\n       \"      <td>Psychology</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>9</th>\\n\",\n       \"      <td>Wendy</td>\\n\",\n       \"      <td>Economics</td>\\n\",\n       \"      <td>female</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>10</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"      <td>male</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>11</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>Computer Science</td>\\n\",\n       \"      <td>None</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"        name             major     sex\\n\",\n       \"0       John  Computer Science    male\\n\",\n       \"1       Nate  Computer Science    male\\n\",\n       \"2    Abraham           Physics    male\\n\",\n       \"3      Brian        Psychology    male\\n\",\n       \"4      Janny         Economics  female\\n\",\n       \"5       Yuna         Economics  female\\n\",\n       \"6   Jeniffer  Computer Science  female\\n\",\n       \"7     Edward  Computer Science    male\\n\",\n       \"8       Zara        Psychology  female\\n\",\n       \"9      Wendy         Economics  female\\n\",\n       \"10      Nate              None    male\\n\",\n       \"11      John  Computer Science    None\"\n      ]\n     },\n     \"execution_count\": 291,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# None 처리 하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 292,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>40.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>35.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>37.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>10.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>12.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>11.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>NaN</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      name      job   age\\n\",\n       \"0     John  teacher  40.0\\n\",\n       \"1     Nate  teacher  35.0\\n\",\n       \"2     Yuna  teacher  37.0\\n\",\n       \"3  Abraham  student  10.0\\n\",\n       \"4    Brian  student  12.0\\n\",\n       \"5    Janny  student  11.0\\n\",\n       \"6     Nate  teacher   NaN\\n\",\n       \"7     John  student   NaN\"\n      ]\n     },\n     \"execution_count\": 292,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"school_id_list = [{'name': 'John', 'job': \\\"teacher\\\", 'age': 40},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\", 'age': 35},\\n\",\n    \"                {'name': 'Yuna', 'job': \\\"teacher\\\", 'age': 37},\\n\",\n    \"                {'name': 'Abraham', 'job': \\\"student\\\", 'age': 10},\\n\",\n    \"                {'name': 'Brian', 'job': \\\"student\\\", 'age': 12},\\n\",\n    \"                {'name': 'Janny', 'job': \\\"student\\\", 'age': 11},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\", 'age': None},\\n\",\n    \"                {'name': 'John', 'job': \\\"student\\\", 'age': None}\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])\\n\",\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Null 또는 NaN 확인하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 293,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"<class 'pandas.core.frame.DataFrame'>\\n\",\n      \"RangeIndex: 8 entries, 0 to 7\\n\",\n      \"Data columns (total 3 columns):\\n\",\n      \"name    8 non-null object\\n\",\n      \"job     8 non-null object\\n\",\n      \"age     6 non-null float64\\n\",\n      \"dtypes: float64(1), object(2)\\n\",\n      \"memory usage: 272.0+ bytes\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"df.info()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 294,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name    job    age\\n\",\n       \"0  False  False  False\\n\",\n       \"1  False  False  False\\n\",\n       \"2  False  False  False\\n\",\n       \"3  False  False  False\\n\",\n       \"4  False  False  False\\n\",\n       \"5  False  False  False\\n\",\n       \"6  False  False   True\\n\",\n       \"7  False  False   True\"\n      ]\n     },\n     \"execution_count\": 294,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.isna()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 295,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>False</td>\\n\",\n       \"      <td>True</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"    name    job    age\\n\",\n       \"0  False  False  False\\n\",\n       \"1  False  False  False\\n\",\n       \"2  False  False  False\\n\",\n       \"3  False  False  False\\n\",\n       \"4  False  False  False\\n\",\n       \"5  False  False  False\\n\",\n       \"6  False  False   True\\n\",\n       \"7  False  False   True\"\n      ]\n     },\n     \"execution_count\": 295,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.isnull()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Null 또는 NaN 값 변경하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"아래는 Null을 0으로 설정하는 예제입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 296,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>40.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>35.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>37.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>10.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>12.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>11.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      name      job   age\\n\",\n       \"0     John  teacher  40.0\\n\",\n       \"1     Nate  teacher  35.0\\n\",\n       \"2     Yuna  teacher  37.0\\n\",\n       \"3  Abraham  student  10.0\\n\",\n       \"4    Brian  student  12.0\\n\",\n       \"5    Janny  student  11.0\\n\",\n       \"6     Nate  teacher   0.0\\n\",\n       \"7     John  student   0.0\"\n      ]\n     },\n     \"execution_count\": 296,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"tmp = df\\n\",\n    \"tmp[\\\"age\\\"] = tmp[\\\"age\\\"].fillna(0)\\n\",\n    \"tmp\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"0으로 설정하기 보다는 선생님의 중간 나이, 학생의 중간 나이로, 각각의 직업군에 맞게 Null값을 변경해줍니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 297,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# fill missing age with median age for each group (teacher, student)\\n\",\n    \"df[\\\"age\\\"].fillna(df.groupby(\\\"job\\\")[\\\"age\\\"].transform(\\\"median\\\"), inplace=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 298,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"      <th>age</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>40.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>35.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Yuna</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>37.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Abraham</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>10.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Brian</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>12.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Janny</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>11.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>6</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>7</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>0.0</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      name      job   age\\n\",\n       \"0     John  teacher  40.0\\n\",\n       \"1     Nate  teacher  35.0\\n\",\n       \"2     Yuna  teacher  37.0\\n\",\n       \"3  Abraham  student  10.0\\n\",\n       \"4    Brian  student  12.0\\n\",\n       \"5    Janny  student  11.0\\n\",\n       \"6     Nate  teacher   0.0\\n\",\n       \"7     John  student   0.0\"\n      ]\n     },\n     \"execution_count\": 298,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Unique\\n\",\n    \"컬럼에 여러 값이 있을 때, 중복 없이 어떤 값들이 있는 지 확인하는 방법입니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 299,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"job_list = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Fred', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Abraham', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Brian', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Janny', 'job': \\\"developer\\\"},\\n\",\n    \"                {'name': 'Nate', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Obrian', 'job': \\\"dentist\\\"},\\n\",\n    \"                {'name': 'Yuna', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Rob', 'job': \\\"lawyer\\\"},\\n\",\n    \"                {'name': 'Brian', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Matt', 'job': \\\"student\\\"},\\n\",\n    \"                {'name': 'Wendy', 'job': \\\"banker\\\"},\\n\",\n    \"                {'name': 'Edward', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Ian', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Chris', 'job': \\\"banker\\\"},\\n\",\n    \"                {'name': 'Philip', 'job': \\\"lawyer\\\"},\\n\",\n    \"                {'name': 'Janny', 'job': \\\"basketball player\\\"},\\n\",\n    \"                {'name': 'Gwen', 'job': \\\"teacher\\\"},\\n\",\n    \"                {'name': 'Jessy', 'job': \\\"student\\\"}\\n\",\n    \"         ]\\n\",\n    \"df = pd.DataFrame(job_list, columns = ['name', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"컬럼(시리즈)의 unique() 함수를 사용하여, 중복 없이, 컬럼에 있는 모든 값들을 출력할 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 300,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"['teacher' 'student' 'developer' 'dentist' 'lawyer' 'banker'\\n\",\n      \" 'basketball player']\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print( df.job.unique() )\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"각 유니크한 값별로 몇개의 데이터가 속하는 지 value_counts() 함수로 확인할 수 있습니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 301,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"teacher              8\\n\",\n       \"student              5\\n\",\n       \"banker               2\\n\",\n       \"lawyer               2\\n\",\n       \"basketball player    1\\n\",\n       \"dentist              1\\n\",\n       \"developer            1\\n\",\n       \"Name: job, dtype: int64\"\n      ]\n     },\n     \"execution_count\": 301,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"df.job.value_counts()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 두개의 데이터프레임 합치기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 302,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"l1 = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"      {'name': 'Nate', 'job': \\\"student\\\"},\\n\",\n    \"      {'name': 'Fred', 'job': \\\"developer\\\"}]\\n\",\n    \"\\n\",\n    \"l2 = [{'name': 'Ed', 'job': \\\"dentist\\\"},\\n\",\n    \"      {'name': 'Jack', 'job': \\\"farmer\\\"},\\n\",\n    \"      {'name': 'Ted', 'job': \\\"designer\\\"}]\\n\",\n    \"         \\n\",\n    \"df1 = pd.DataFrame(l1, columns = ['name', 'job'])\\n\",\n    \"df2 = pd.DataFrame(l2, columns = ['name', 'job'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## pd.concat\\n\",\n    \"두번째 데이터프레임을 첫번째 데이터프레임의 새로운 로우(행)로 합칩니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 303,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"frames = [df1, df2]\\n\",\n    \"result = pd.concat(frames, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 304,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Fred</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Ed</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Jack</td>\\n\",\n       \"      <td>farmer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Ted</td>\\n\",\n       \"      <td>designer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name        job\\n\",\n       \"0  John    teacher\\n\",\n       \"1  Nate    student\\n\",\n       \"2  Fred  developer\\n\",\n       \"3    Ed    dentist\\n\",\n       \"4  Jack     farmer\\n\",\n       \"5   Ted   designer\"\n      ]\n     },\n     \"execution_count\": 304,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"result\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## df.append\\n\",\n    \"두번째 데이터프레임을 첫번째 데이터프레임의 새로운 로우(행)로 합칩니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 305,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"l1 = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"      {'name': 'Nate', 'job': \\\"student\\\"},\\n\",\n    \"      {'name': 'Fred', 'job': \\\"developer\\\"}]\\n\",\n    \"\\n\",\n    \"l2 = [{'name': 'Ed', 'job': \\\"dentist\\\"},\\n\",\n    \"      {'name': 'Jack', 'job': \\\"farmer\\\"},\\n\",\n    \"      {'name': 'Ted', 'job': \\\"designer\\\"}]\\n\",\n    \"         \\n\",\n    \"df1 = pd.DataFrame(l1, columns = ['name', 'job'])\\n\",\n    \"df2 = pd.DataFrame(l2, columns = ['name', 'job'])\\n\",\n    \"result = df1.append(df2, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 306,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>name</th>\\n\",\n       \"      <th>job</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Fred</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>Ed</td>\\n\",\n       \"      <td>dentist</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>Jack</td>\\n\",\n       \"      <td>farmer</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>5</th>\\n\",\n       \"      <td>Ted</td>\\n\",\n       \"      <td>designer</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   name        job\\n\",\n       \"0  John    teacher\\n\",\n       \"1  Nate    student\\n\",\n       \"2  Fred  developer\\n\",\n       \"3    Ed    dentist\\n\",\n       \"4  Jack     farmer\\n\",\n       \"5   Ted   designer\"\n      ]\n     },\n     \"execution_count\": 306,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"result\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## pd.concat\\n\",\n    \"두번째 데이터프레임을 첫번째 데이터프레임의 새로운 컬럼(열)으로 합칩니다.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 307,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"l1 = [{'name': 'John', 'job': \\\"teacher\\\"},\\n\",\n    \"      {'name': 'Nate', 'job': \\\"student\\\"},\\n\",\n    \"      {'name': 'Jack', 'job': \\\"developer\\\"}]\\n\",\n    \"\\n\",\n    \"l2 = [{'age': 25, 'country': \\\"U.S\\\"},\\n\",\n    \"      {'age': 30, 'country': \\\"U.K\\\"},\\n\",\n    \"      {'age': 45, 'country': \\\"Korea\\\"}]\\n\",\n    \"         \\n\",\n    \"df1 = pd.DataFrame(l1, columns = ['name', 'job'])\\n\",\n    \"df2 = pd.DataFrame(l2, columns = ['age', 'country'])\\n\",\n    \"result = pd.concat([df1, df2], axis=1, ignore_index=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 308,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <th>3</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>John</td>\\n\",\n       \"      <td>teacher</td>\\n\",\n       \"      <td>25</td>\\n\",\n       \"      <td>U.S</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>Nate</td>\\n\",\n       \"      <td>student</td>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>U.K</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>Jack</td>\\n\",\n       \"      <td>developer</td>\\n\",\n       \"      <td>45</td>\\n\",\n       \"      <td>Korea</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"      0          1   2      3\\n\",\n       \"0  John    teacher  25    U.S\\n\",\n       \"1  Nate    student  30    U.K\\n\",\n       \"2  Jack  developer  45  Korea\"\n      ]\n     },\n     \"execution_count\": 308,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"result\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# 두개의 리스트를 묶어서 데이터프레임으로 생성하기\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 309,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/html\": [\n       \"<div>\\n\",\n       \"<style scoped>\\n\",\n       \"    .dataframe tbody tr th:only-of-type {\\n\",\n       \"        vertical-align: middle;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe tbody tr th {\\n\",\n       \"        vertical-align: top;\\n\",\n       \"    }\\n\",\n       \"\\n\",\n       \"    .dataframe thead th {\\n\",\n       \"        text-align: right;\\n\",\n       \"    }\\n\",\n       \"</style>\\n\",\n       \"<table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \"    <tr style=\\\"text-align: right;\\\">\\n\",\n       \"      <th></th>\\n\",\n       \"      <th>label</th>\\n\",\n       \"      <th>prediction</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>0</th>\\n\",\n       \"      <td>1</td>\\n\",\n       \"      <td>1</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>1</th>\\n\",\n       \"      <td>2</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>2</th>\\n\",\n       \"      <td>3</td>\\n\",\n       \"      <td>2</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>3</th>\\n\",\n       \"      <td>4</td>\\n\",\n       \"      <td>5</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <th>4</th>\\n\",\n       \"      <td>5</td>\\n\",\n       \"      <td>5</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table>\\n\",\n       \"</div>\"\n      ],\n      \"text/plain\": [\n       \"   label  prediction\\n\",\n       \"0      1           1\\n\",\n       \"1      2           2\\n\",\n       \"2      3           2\\n\",\n       \"3      4           5\\n\",\n       \"4      5           5\"\n      ]\n     },\n     \"execution_count\": 309,\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"label = [1,2,3,4,5]\\n\",\n    \"prediction = [1,2,2,5,5]\\n\",\n    \"\\n\",\n    \"comparison = pd.DataFrame(\\n\",\n    \"    {'label': label,\\n\",\n    \"     'prediction': prediction\\n\",\n    \"    })\\n\",\n    \"\\n\",\n    \"comparison\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.4\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  }
]