Full Code of guipsamora/pandas_exercises for AI

master daf0fd2c7c34 cached

103 files

79.8 MB

996.6k tokens

1 requests

Download .txt

Showing preview only (3,984K chars total). Download the full file or copy to clipboard to get everything.

Repository: guipsamora/pandas_exercises
Branch: master
Commit: daf0fd2c7c34
Files: 103
Total size: 79.8 MB

Directory structure:
gitextract_4ujte8zq/

├── .github/
│   └── FUNDING.yml
├── .gitignore
├── 01_Getting_&_Knowing_Your_Data/
│   ├── Chipotle/
│   │   ├── Exercise_with_Solutions.ipynb
│   │   ├── Exercises.ipynb
│   │   └── Solutions.ipynb
│   ├── Occupation/
│   │   ├── Exercise_with_Solution.ipynb
│   │   ├── Exercises.ipynb
│   │   └── Solutions.ipynb
│   └── World_Food_Facts/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       └── Solutions.ipynb
├── 02_Filtering_&_Sorting/
│   ├── Chipotle/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   ├── Euro12/
│   │   ├── Euro_2012_stats_TEAM.csv
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_Solutions.ipynb
│   │   └── Solutions.ipynb
│   └── Fictional_Army/
│       ├── Exercise.ipynb
│       ├── Exercise_with_solutions.ipynb
│       └── Solutions.ipynb
├── 03_Grouping/
│   ├── Alcohol_Consumption/
│   │   ├── Exercise.ipynb
│   │   ├── Exercise_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   ├── Occupation/
│   │   ├── Exercise.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   └── Regiment/
│       ├── Exercises.ipynb
│       ├── Exercises_solutions.ipynb
│       └── Solutions.ipynb
├── 04_Apply/
│   ├── Students_Alcohol_Consumption/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   └── student-mat.csv
│   └── US_Crime_Rates/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       ├── Solutions.ipynb
│       └── US_Crime_Rates_1960_2014.csv
├── 05_Merge/
│   ├── Auto_MPG/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   ├── cars1.csv
│   │   └── cars2.csv
│   ├── Fictitous_Names/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   └── Housing_Market/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       └── Solutions.ipynb
├── 06_Stats/
│   ├── US_Baby_Names/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   └── US_Baby_Names_right.csv
│   └── Wind_Stats/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       ├── Solutions.ipynb
│       ├── wind.data
│       └── wind.desc
├── 07_Visualization/
│   ├── Chipotle/
│   │   ├── Exercise_with_Solutions.ipynb
│   │   ├── Exercises.ipynb
│   │   └── Solutions.ipynb
│   ├── Online_Retail/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions_code.ipynb
│   │   ├── Online_Retail.csv
│   │   └── Solutions.ipynb
│   ├── Scores/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions_code.ipynb
│   │   └── Solutions.ipynb
│   ├── Tips/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_code_and_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   └── tips.csv
│   └── Titanic_Disaster/
│       ├── Exercises.ipynb
│       ├── Exercises_code_with_solutions.ipynb
│       ├── Solutions.ipynb
│       └── train.csv
├── 08_Creating_Series_and_DataFrames/
│   └── Pokemon/
│       ├── Exercises-with-solutions-and-code.ipynb
│       ├── Exercises.ipynb
│       └── Solutions.ipynb
├── 09_Time_Series/
│   ├── Apple_Stock/
│   │   ├── Exercises-with-solutions-code.ipynb
│   │   ├── Exercises.ipynb
│   │   ├── Solutions.ipynb
│   │   └── appl_1980_2014.csv
│   ├── Getting_Financial_Data/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_solutions.ipynb
│   │   ├── Exercises_with_solutions_and_code.ipynb
│   │   └── Solutions.ipynb
│   └── Investor_Flow_of_Funds_US/
│       ├── Exercises.ipynb
│       ├── Exercises_with_code_and_solutions.ipynb
│       └── Solutions.ipynb
├── 10_Deleting/
│   ├── Iris/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions_and_code.ipynb
│   │   └── Solutions.ipynb
│   └── Wine/
│       ├── Exercises.ipynb
│       ├── Exercises_code_and_solutions.ipynb
│       └── Solutions.ipynb
├── 11_Indexing/
│   └── Exercises.ipynb
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── Template/
│   ├── Exercises.ipynb
│   └── Solutions.ipynb
└── requirements.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms

github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: ['paypal.me/guisamora']


================================================
FILE: .gitignore
================================================
.ipynb_checkpoints
.Rproj
.Rproj.user
.python

================================================
FILE: 01_Getting_&_Knowing_Your_Data/Chipotle/Exercise_with_Solutions.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex2 - Getting and Knowing your Data\n",
    "\n",
    "Check out [Chipotle Exercises Video Tutorial](https://www.youtube.com/watch?v=lpuYZ5EUyS8&list=PLgJhDSE2ZLxaY_DigHeiIDC1cD09rXgJv&index=2) to watch a data scientist go through the exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called chipo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n",
    "    \n",
    "chipo = pd.read_csv(url, sep = '\\t')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 10 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>$2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>$3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>$3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>$2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>$16.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>$10.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Side of Chips</td>\n",
       "      <td>NaN</td>\n",
       "      <td>$1.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>$11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Soft Tacos</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
       "      <td>$9.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
       "      <td>$9.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  quantity                              item_name  \\\n",
       "0         1         1           Chips and Fresh Tomato Salsa   \n",
       "1         1         1                                   Izze   \n",
       "2         1         1                       Nantucket Nectar   \n",
       "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4         2         2                           Chicken Bowl   \n",
       "5         3         1                           Chicken Bowl   \n",
       "6         3         1                          Side of Chips   \n",
       "7         4         1                          Steak Burrito   \n",
       "8         4         1                       Steak Soft Tacos   \n",
       "9         5         1                          Steak Burrito   \n",
       "\n",
       "                                  choice_description item_price  \n",
       "0                                                NaN     $2.39   \n",
       "1                                       [Clementine]     $3.39   \n",
       "2                                            [Apple]     $3.39   \n",
       "3                                                NaN     $2.39   \n",
       "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...    $16.98   \n",
       "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...    $10.98   \n",
       "6                                                NaN     $1.69   \n",
       "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...    $11.75   \n",
       "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...     $9.25   \n",
       "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...     $9.25   "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4622"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution 1\n",
    "\n",
    "chipo.shape[0]  # entries <= 4622 observations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4622 entries, 0 to 4621\n",
      "Data columns (total 5 columns):\n",
      "order_id              4622 non-null int64\n",
      "quantity              4622 non-null int64\n",
      "item_name             4622 non-null object\n",
      "choice_description    3376 non-null object\n",
      "item_price            4622 non-null object\n",
      "dtypes: int64(2), object(3)\n",
      "memory usage: 180.6+ KB\n"
     ]
    }
   ],
   "source": [
    "# Solution 2\n",
    "\n",
    "chipo.info() # entries <= 4622 observations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.shape[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'order_id', u'quantity', u'item_name', u'choice_description',\n",
       "       u'item_price'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=4622, step=1)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. Which was the most-ordered item? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Chicken Bowl</th>\n",
       "      <td>713926</td>\n",
       "      <td>761</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              order_id  quantity\n",
       "item_name                       \n",
       "Chicken Bowl    713926       761"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = chipo.groupby('item_name')\n",
    "c = c.sum()\n",
    "c = c.sort_values(['quantity'], ascending=False)\n",
    "c.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. For the most-ordered item, how many items were ordered?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Chicken Bowl</th>\n",
       "      <td>713926</td>\n",
       "      <td>761</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              order_id  quantity\n",
       "item_name                       \n",
       "Chicken Bowl    713926       761"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = chipo.groupby('item_name')\n",
    "c = c.sum()\n",
    "c = c.sort_values(['quantity'], ascending=False)\n",
    "c.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. What was the most ordered item in the choice_description column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>choice_description</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>[Diet Coke]</th>\n",
       "      <td>123455</td>\n",
       "      <td>159</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    order_id  quantity\n",
       "choice_description                    \n",
       "[Diet Coke]           123455       159"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = chipo.groupby('choice_description').sum()\n",
    "c = c.sort_values(['quantity'], ascending=False)\n",
    "c.head(1)\n",
    "# Diet Coke 159"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 12. How many items were orderd in total?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4972"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_items_orders = chipo.quantity.sum()\n",
    "total_items_orders"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 13. Turn the item price into a float"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.a. Check the item price type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('O')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.item_price.dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.b. Create a lambda function and change the type of item price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dollarizer = lambda x: float(x[1:-1])\n",
    "chipo.item_price = chipo.item_price.apply(dollarizer)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.c. Check the item price type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.item_price.dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 14. How much was the revenue for the period in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Revenue was: $39237.02\n"
     ]
    }
   ],
   "source": [
    "revenue = (chipo['quantity']* chipo['item_price']).sum()\n",
    "\n",
    "print('Revenue was: $' + str(np.round(revenue,2)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 15. How many orders were made in the period?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1834"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders = chipo.order_id.value_counts().count()\n",
    "orders"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 16. What is the average revenue amount per order?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21.394231188658654"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution 1\n",
    "\n",
    "chipo['revenue'] = chipo['quantity'] * chipo['item_price']\n",
    "order_grouped = chipo.groupby(by=['order_id']).sum()\n",
    "order_grouped.mean()['revenue']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21.394231188658654"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution 2\n",
    "\n",
    "chipo.groupby('order_id')['revenue'].sum().mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 17. How many different items are sold?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chipo.item_name.value_counts().count()"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex2 - Getting and Knowing your Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called chipo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 10 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Solution 1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Solution 2\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. Which was the most-ordered item? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. For the most-ordered item, how many items were ordered?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. What was the most ordered item in the choice_description column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 12. How many items were orderd in total?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 13. Turn the item price into a float"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.a. Check the item price type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.b. Create a lambda function and change the type of item price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.c. Check the item price type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 14. How much was the revenue for the period in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 15. How many orders were made in the period?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 16. What is the average revenue amount per order?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Solution 1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Solution 2\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 17. How many different items are sold?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/Chipotle/Solutions.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex2 - Getting and Knowing your Data\n",
    "\n",
    "Check out [Chipotle Exercises Video Tutorial](https://www.youtube.com/watch?v=lpuYZ5EUyS8&list=PLgJhDSE2ZLxaY_DigHeiIDC1cD09rXgJv&index=2) to watch a data scientist go through the exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called chipo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 10 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>$2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>$3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>$3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>$2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>$16.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>$10.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Side of Chips</td>\n",
       "      <td>NaN</td>\n",
       "      <td>$1.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>$11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Soft Tacos</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
       "      <td>$9.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
       "      <td>$9.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  quantity                              item_name  \\\n",
       "0         1         1           Chips and Fresh Tomato Salsa   \n",
       "1         1         1                                   Izze   \n",
       "2         1         1                       Nantucket Nectar   \n",
       "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4         2         2                           Chicken Bowl   \n",
       "5         3         1                           Chicken Bowl   \n",
       "6         3         1                          Side of Chips   \n",
       "7         4         1                          Steak Burrito   \n",
       "8         4         1                       Steak Soft Tacos   \n",
       "9         5         1                          Steak Burrito   \n",
       "\n",
       "                                  choice_description item_price  \n",
       "0                                                NaN     $2.39   \n",
       "1                                       [Clementine]     $3.39   \n",
       "2                                            [Apple]     $3.39   \n",
       "3                                                NaN     $2.39   \n",
       "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...    $16.98   \n",
       "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...    $10.98   \n",
       "6                                                NaN     $1.69   \n",
       "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...    $11.75   \n",
       "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...     $9.25   \n",
       "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...     $9.25   "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4622"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution 1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4622 entries, 0 to 4621\n",
      "Data columns (total 5 columns):\n",
      "order_id              4622 non-null int64\n",
      "quantity              4622 non-null int64\n",
      "item_name             4622 non-null object\n",
      "choice_description    3376 non-null object\n",
      "item_price            4622 non-null object\n",
      "dtypes: int64(2), object(3)\n",
      "memory usage: 180.6+ KB\n"
     ]
    }
   ],
   "source": [
    "# Solution 2\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'order_id', u'quantity', u'item_name', u'choice_description',\n",
       "       u'item_price'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=4622, step=1)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. Which was the most-ordered item? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Chicken Bowl</th>\n",
       "      <td>713926</td>\n",
       "      <td>761</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              order_id  quantity\n",
       "item_name                       \n",
       "Chicken Bowl    713926       761"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. For the most-ordered item, how many items were ordered?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item_name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Chicken Bowl</th>\n",
       "      <td>713926</td>\n",
       "      <td>761</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              order_id  quantity\n",
       "item_name                       \n",
       "Chicken Bowl    713926       761"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. What was the most ordered item in the choice_description column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>choice_description</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>[Diet Coke]</th>\n",
       "      <td>123455</td>\n",
       "      <td>159</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    order_id  quantity\n",
       "choice_description                    \n",
       "[Diet Coke]           123455       159"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 12. How many items were orderd in total?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4972"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 13. Turn the item price into a float"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.a. Check the item price type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('O')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.b. Create a lambda function and change the type of item price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Step 13.c. Check the item price type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 14. How much was the revenue for the period in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Revenue was: $39237.02\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 15. How many orders were made in the period?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1834"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 16. What is the average revenue amount per order?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21.394231188658654"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution 1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21.394231188658654"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution 2\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 17. How many different items are sold?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/Occupation/Exercise_with_Solution.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex3 - Getting and Knowing your Data\n",
    "\n",
    "Check out [Occupation Exercises Video Tutorial](https://www.youtube.com/watch?v=W8AB5s-L3Rw&list=PLgJhDSE2ZLxaY_DigHeiIDC1cD09rXgJv&index=4) to watch a data scientist go through the exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called users and use the 'user_id' as index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "users = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user', \n",
    "                      sep='|', index_col='user_id')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 25 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>85711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>53</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>94043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>32067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>43537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>33</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>15213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>42</td>\n",
       "      <td>M</td>\n",
       "      <td>executive</td>\n",
       "      <td>98101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>57</td>\n",
       "      <td>M</td>\n",
       "      <td>administrator</td>\n",
       "      <td>91344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>36</td>\n",
       "      <td>M</td>\n",
       "      <td>administrator</td>\n",
       "      <td>05201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>29</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>01002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>53</td>\n",
       "      <td>M</td>\n",
       "      <td>lawyer</td>\n",
       "      <td>90703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>39</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>30329</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>28</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>06405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>47</td>\n",
       "      <td>M</td>\n",
       "      <td>educator</td>\n",
       "      <td>29206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>45</td>\n",
       "      <td>M</td>\n",
       "      <td>scientist</td>\n",
       "      <td>55106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>49</td>\n",
       "      <td>F</td>\n",
       "      <td>educator</td>\n",
       "      <td>97301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>21</td>\n",
       "      <td>M</td>\n",
       "      <td>entertainment</td>\n",
       "      <td>10309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>30</td>\n",
       "      <td>M</td>\n",
       "      <td>programmer</td>\n",
       "      <td>06355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>35</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>37212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>40</td>\n",
       "      <td>M</td>\n",
       "      <td>librarian</td>\n",
       "      <td>02138</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>42</td>\n",
       "      <td>F</td>\n",
       "      <td>homemaker</td>\n",
       "      <td>95660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>26</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>30068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>25</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>40206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>30</td>\n",
       "      <td>F</td>\n",
       "      <td>artist</td>\n",
       "      <td>48197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>21</td>\n",
       "      <td>F</td>\n",
       "      <td>artist</td>\n",
       "      <td>94533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>39</td>\n",
       "      <td>M</td>\n",
       "      <td>engineer</td>\n",
       "      <td>55107</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         age gender     occupation zip_code\n",
       "user_id                                    \n",
       "1         24      M     technician    85711\n",
       "2         53      F          other    94043\n",
       "3         23      M         writer    32067\n",
       "4         24      M     technician    43537\n",
       "5         33      F          other    15213\n",
       "6         42      M      executive    98101\n",
       "7         57      M  administrator    91344\n",
       "8         36      M  administrator    05201\n",
       "9         29      M        student    01002\n",
       "10        53      M         lawyer    90703\n",
       "11        39      F          other    30329\n",
       "12        28      F          other    06405\n",
       "13        47      M       educator    29206\n",
       "14        45      M      scientist    55106\n",
       "15        49      F       educator    97301\n",
       "16        21      M  entertainment    10309\n",
       "17        30      M     programmer    06355\n",
       "18        35      F          other    37212\n",
       "19        40      M      librarian    02138\n",
       "20        42      F      homemaker    95660\n",
       "21        26      M         writer    30068\n",
       "22        25      M         writer    40206\n",
       "23        30      F         artist    48197\n",
       "24        21      F         artist    94533\n",
       "25        39      M       engineer    55107"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.head(25)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. See the last 10 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>934</th>\n",
       "      <td>61</td>\n",
       "      <td>M</td>\n",
       "      <td>engineer</td>\n",
       "      <td>22902</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>935</th>\n",
       "      <td>42</td>\n",
       "      <td>M</td>\n",
       "      <td>doctor</td>\n",
       "      <td>66221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>936</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>other</td>\n",
       "      <td>32789</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>937</th>\n",
       "      <td>48</td>\n",
       "      <td>M</td>\n",
       "      <td>educator</td>\n",
       "      <td>98072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>938</th>\n",
       "      <td>38</td>\n",
       "      <td>F</td>\n",
       "      <td>technician</td>\n",
       "      <td>55038</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>939</th>\n",
       "      <td>26</td>\n",
       "      <td>F</td>\n",
       "      <td>student</td>\n",
       "      <td>33319</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>940</th>\n",
       "      <td>32</td>\n",
       "      <td>M</td>\n",
       "      <td>administrator</td>\n",
       "      <td>02215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>941</th>\n",
       "      <td>20</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>97229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>942</th>\n",
       "      <td>48</td>\n",
       "      <td>F</td>\n",
       "      <td>librarian</td>\n",
       "      <td>78209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>943</th>\n",
       "      <td>22</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>77841</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         age gender     occupation zip_code\n",
       "user_id                                    \n",
       "934       61      M       engineer    22902\n",
       "935       42      M         doctor    66221\n",
       "936       24      M          other    32789\n",
       "937       48      M       educator    98072\n",
       "938       38      F     technician    55038\n",
       "939       26      F        student    33319\n",
       "940       32      M  administrator    02215\n",
       "941       20      M        student    97229\n",
       "942       48      F      librarian    78209\n",
       "943       22      M        student    77841"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.tail(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "943"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.shape[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.shape[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['age', 'gender', 'occupation', 'zip_code'], dtype='object')"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,\n",
       "            ...\n",
       "            934, 935, 936, 937, 938, 939, 940, 941, 942, 943],\n",
       "           dtype='int64', name='user_id', length=943)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# \"the index\" (aka \"the labels\")\n",
    "users.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. What is the data type of each column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "age            int64\n",
       "gender        object\n",
       "occupation    object\n",
       "zip_code      object\n",
       "dtype: object"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. Print only the occupation column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "user_id\n",
       "1         technician\n",
       "2              other\n",
       "3             writer\n",
       "4         technician\n",
       "5              other\n",
       "6          executive\n",
       "7      administrator\n",
       "8      administrator\n",
       "9            student\n",
       "10            lawyer\n",
       "11             other\n",
       "12             other\n",
       "13          educator\n",
       "14         scientist\n",
       "15          educator\n",
       "16     entertainment\n",
       "17        programmer\n",
       "18             other\n",
       "19         librarian\n",
       "20         homemaker\n",
       "21            writer\n",
       "22            writer\n",
       "23            artist\n",
       "24            artist\n",
       "25          engineer\n",
       "26          engineer\n",
       "27         librarian\n",
       "28            writer\n",
       "29        programmer\n",
       "30           student\n",
       "           ...      \n",
       "914            other\n",
       "915    entertainment\n",
       "916         engineer\n",
       "917          student\n",
       "918        scientist\n",
       "919            other\n",
       "920           artist\n",
       "921          student\n",
       "922    administrator\n",
       "923          student\n",
       "924            other\n",
       "925         salesman\n",
       "926    entertainment\n",
       "927       programmer\n",
       "928          student\n",
       "929        scientist\n",
       "930        scientist\n",
       "931         educator\n",
       "932         educator\n",
       "933          student\n",
       "934         engineer\n",
       "935           doctor\n",
       "936            other\n",
       "937         educator\n",
       "938       technician\n",
       "939          student\n",
       "940    administrator\n",
       "941          student\n",
       "942        librarian\n",
       "943          student\n",
       "Name: occupation, Length: 943, dtype: object"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.occupation\n",
    "\n",
    "#or\n",
    "\n",
    "users['occupation']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 12. How many different occupations are in this dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.occupation.nunique()\n",
    "#or by using value_counts() which returns the count of unique elements\n",
    "#users.occupation.value_counts().count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 13. What is the most frequent occupation?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'student'"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Because \"most\" is asked\n",
    "users.occupation.value_counts().head(1).index[0]\n",
    "\n",
    "#or\n",
    "#to have the top 5\n",
    "\n",
    "# users.occupation.value_counts().head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 14. Summarize the DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>943.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>34.051962</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>12.192740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>7.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>25.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>31.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>73.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              age\n",
       "count  943.000000\n",
       "mean    34.051962\n",
       "std     12.192740\n",
       "min      7.000000\n",
       "25%     25.000000\n",
       "50%     31.000000\n",
       "75%     43.000000\n",
       "max     73.000000"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.describe() #Notice: by default, only the numeric columns are returned. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 15. Summarize all the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>943.000000</td>\n",
       "      <td>943</td>\n",
       "      <td>943</td>\n",
       "      <td>943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>21</td>\n",
       "      <td>795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>55414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>670</td>\n",
       "      <td>196</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>34.051962</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>12.192740</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>7.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>25.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>31.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>73.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               age gender occupation zip_code\n",
       "count   943.000000    943        943      943\n",
       "unique         NaN      2         21      795\n",
       "top            NaN      M    student    55414\n",
       "freq           NaN    670        196        9\n",
       "mean     34.051962    NaN        NaN      NaN\n",
       "std      12.192740    NaN        NaN      NaN\n",
       "min       7.000000    NaN        NaN      NaN\n",
       "25%      25.000000    NaN        NaN      NaN\n",
       "50%      31.000000    NaN        NaN      NaN\n",
       "75%      43.000000    NaN        NaN      NaN\n",
       "max      73.000000    NaN        NaN      NaN"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.describe(include = \"all\") #Notice: By default, only the numeric columns are returned."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 16. Summarize only the occupation column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count         943\n",
       "unique         21\n",
       "top       student\n",
       "freq          196\n",
       "Name: occupation, dtype: object"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.occupation.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 17. What is the mean age of users?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "34"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(users.age.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 18. What is the age with least occurrence?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11    1\n",
       "10    1\n",
       "73    1\n",
       "66    1\n",
       "7     1\n",
       "Name: age, dtype: int64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "users.age.value_counts().tail() #7, 10, 11, 66 and 73 years -> only 1 occurrence"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex3 - Getting and Knowing your Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called users and use the 'user_id' as index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 25 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. See the last 10 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. What is the data type of each column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. Print only the occupation column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 12. How many different occupations are in this dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 13. What is the most frequent occupation?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 14. Summarize the DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 15. Summarize all the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 16. Summarize only the occupation column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 17. What is the mean age of users?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 18. What is the age with least occurrence?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/Occupation/Solutions.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex3 - Getting and Knowing your Data\n",
    "\n",
    "Check out [Occupation Exercises Video Tutorial](https://www.youtube.com/watch?v=W8AB5s-L3Rw&list=PLgJhDSE2ZLxaY_DigHeiIDC1cD09rXgJv&index=4) to watch a data scientist go through the exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called users and use the 'user_id' as index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 25 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>85711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>53</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>94043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>32067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>43537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>33</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>15213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>42</td>\n",
       "      <td>M</td>\n",
       "      <td>executive</td>\n",
       "      <td>98101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>57</td>\n",
       "      <td>M</td>\n",
       "      <td>administrator</td>\n",
       "      <td>91344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>36</td>\n",
       "      <td>M</td>\n",
       "      <td>administrator</td>\n",
       "      <td>05201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>29</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>01002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>53</td>\n",
       "      <td>M</td>\n",
       "      <td>lawyer</td>\n",
       "      <td>90703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>39</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>30329</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>28</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>06405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>47</td>\n",
       "      <td>M</td>\n",
       "      <td>educator</td>\n",
       "      <td>29206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>45</td>\n",
       "      <td>M</td>\n",
       "      <td>scientist</td>\n",
       "      <td>55106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>49</td>\n",
       "      <td>F</td>\n",
       "      <td>educator</td>\n",
       "      <td>97301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>21</td>\n",
       "      <td>M</td>\n",
       "      <td>entertainment</td>\n",
       "      <td>10309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>30</td>\n",
       "      <td>M</td>\n",
       "      <td>programmer</td>\n",
       "      <td>06355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>35</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>37212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>40</td>\n",
       "      <td>M</td>\n",
       "      <td>librarian</td>\n",
       "      <td>02138</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>42</td>\n",
       "      <td>F</td>\n",
       "      <td>homemaker</td>\n",
       "      <td>95660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>26</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>30068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>25</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>40206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>30</td>\n",
       "      <td>F</td>\n",
       "      <td>artist</td>\n",
       "      <td>48197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>21</td>\n",
       "      <td>F</td>\n",
       "      <td>artist</td>\n",
       "      <td>94533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>39</td>\n",
       "      <td>M</td>\n",
       "      <td>engineer</td>\n",
       "      <td>55107</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         age gender     occupation zip_code\n",
       "user_id                                    \n",
       "1         24      M     technician    85711\n",
       "2         53      F          other    94043\n",
       "3         23      M         writer    32067\n",
       "4         24      M     technician    43537\n",
       "5         33      F          other    15213\n",
       "6         42      M      executive    98101\n",
       "7         57      M  administrator    91344\n",
       "8         36      M  administrator    05201\n",
       "9         29      M        student    01002\n",
       "10        53      M         lawyer    90703\n",
       "11        39      F          other    30329\n",
       "12        28      F          other    06405\n",
       "13        47      M       educator    29206\n",
       "14        45      M      scientist    55106\n",
       "15        49      F       educator    97301\n",
       "16        21      M  entertainment    10309\n",
       "17        30      M     programmer    06355\n",
       "18        35      F          other    37212\n",
       "19        40      M      librarian    02138\n",
       "20        42      F      homemaker    95660\n",
       "21        26      M         writer    30068\n",
       "22        25      M         writer    40206\n",
       "23        30      F         artist    48197\n",
       "24        21      F         artist    94533\n",
       "25        39      M       engineer    55107"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. See the last 10 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>934</th>\n",
       "      <td>61</td>\n",
       "      <td>M</td>\n",
       "      <td>engineer</td>\n",
       "      <td>22902</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>935</th>\n",
       "      <td>42</td>\n",
       "      <td>M</td>\n",
       "      <td>doctor</td>\n",
       "      <td>66221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>936</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>other</td>\n",
       "      <td>32789</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>937</th>\n",
       "      <td>48</td>\n",
       "      <td>M</td>\n",
       "      <td>educator</td>\n",
       "      <td>98072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>938</th>\n",
       "      <td>38</td>\n",
       "      <td>F</td>\n",
       "      <td>technician</td>\n",
       "      <td>55038</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>939</th>\n",
       "      <td>26</td>\n",
       "      <td>F</td>\n",
       "      <td>student</td>\n",
       "      <td>33319</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>940</th>\n",
       "      <td>32</td>\n",
       "      <td>M</td>\n",
       "      <td>administrator</td>\n",
       "      <td>02215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>941</th>\n",
       "      <td>20</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>97229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>942</th>\n",
       "      <td>48</td>\n",
       "      <td>F</td>\n",
       "      <td>librarian</td>\n",
       "      <td>78209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>943</th>\n",
       "      <td>22</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>77841</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         age gender     occupation zip_code\n",
       "user_id                                    \n",
       "934       61      M       engineer    22902\n",
       "935       42      M         doctor    66221\n",
       "936       24      M          other    32789\n",
       "937       48      M       educator    98072\n",
       "938       38      F     technician    55038\n",
       "939       26      F        student    33319\n",
       "940       32      M  administrator    02215\n",
       "941       20      M        student    97229\n",
       "942       48      F      librarian    78209\n",
       "943       22      M        student    77841"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "943"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['age', 'gender', 'occupation', 'zip_code'], dtype='object')"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,\n",
       "            ...\n",
       "            934, 935, 936, 937, 938, 939, 940, 941, 942, 943],\n",
       "           dtype='int64', name='user_id', length=943)"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# \"the index\" (aka \"the labels\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. What is the data type of each column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "age            int64\n",
       "gender        object\n",
       "occupation    object\n",
       "zip_code      object\n",
       "dtype: object"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. Print only the occupation column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "user_id\n",
       "1         technician\n",
       "2              other\n",
       "3             writer\n",
       "4         technician\n",
       "5              other\n",
       "6          executive\n",
       "7      administrator\n",
       "8      administrator\n",
       "9            student\n",
       "10            lawyer\n",
       "11             other\n",
       "12             other\n",
       "13          educator\n",
       "14         scientist\n",
       "15          educator\n",
       "16     entertainment\n",
       "17        programmer\n",
       "18             other\n",
       "19         librarian\n",
       "20         homemaker\n",
       "21            writer\n",
       "22            writer\n",
       "23            artist\n",
       "24            artist\n",
       "25          engineer\n",
       "26          engineer\n",
       "27         librarian\n",
       "28            writer\n",
       "29        programmer\n",
       "30           student\n",
       "           ...      \n",
       "914            other\n",
       "915    entertainment\n",
       "916         engineer\n",
       "917          student\n",
       "918        scientist\n",
       "919            other\n",
       "920           artist\n",
       "921          student\n",
       "922    administrator\n",
       "923          student\n",
       "924            other\n",
       "925         salesman\n",
       "926    entertainment\n",
       "927       programmer\n",
       "928          student\n",
       "929        scientist\n",
       "930        scientist\n",
       "931         educator\n",
       "932         educator\n",
       "933          student\n",
       "934         engineer\n",
       "935           doctor\n",
       "936            other\n",
       "937         educator\n",
       "938       technician\n",
       "939          student\n",
       "940    administrator\n",
       "941          student\n",
       "942        librarian\n",
       "943          student\n",
       "Name: occupation, Length: 943, dtype: object"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 12. How many different occupations are in this dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 13. What is the most frequent occupation?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'student'"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 14. Summarize the DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>943.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>34.051962</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>12.192740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>7.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>25.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>31.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>73.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              age\n",
       "count  943.000000\n",
       "mean    34.051962\n",
       "std     12.192740\n",
       "min      7.000000\n",
       "25%     25.000000\n",
       "50%     31.000000\n",
       "75%     43.000000\n",
       "max     73.000000"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 15. Summarize all the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>943.000000</td>\n",
       "      <td>943</td>\n",
       "      <td>943</td>\n",
       "      <td>943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>21</td>\n",
       "      <td>795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>M</td>\n",
       "      <td>student</td>\n",
       "      <td>55414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>670</td>\n",
       "      <td>196</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>34.051962</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>12.192740</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>7.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>25.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>31.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>73.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               age gender occupation zip_code\n",
       "count   943.000000    943        943      943\n",
       "unique         NaN      2         21      795\n",
       "top            NaN      M    student    55414\n",
       "freq           NaN    670        196        9\n",
       "mean     34.051962    NaN        NaN      NaN\n",
       "std      12.192740    NaN        NaN      NaN\n",
       "min       7.000000    NaN        NaN      NaN\n",
       "25%      25.000000    NaN        NaN      NaN\n",
       "50%      31.000000    NaN        NaN      NaN\n",
       "75%      43.000000    NaN        NaN      NaN\n",
       "max      73.000000    NaN        NaN      NaN"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 16. Summarize only the occupation column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count         943\n",
       "unique         21\n",
       "top       student\n",
       "freq          196\n",
       "Name: occupation, dtype: object"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 17. What is the mean age of users?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "34"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 18. What is the age with least occurrence?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11    1\n",
       "10    1\n",
       "73    1\n",
       "66    1\n",
       "7     1\n",
       "Name: age, dtype: int64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/World_Food_Facts/Exercises.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Download the dataset to your computer and unzip it."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Use the tsv file and assign it to a dataframe called food"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 5 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. What is the name of 105th column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. What is the type of the observations of the 105th column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. What is the product name of the 19th observation?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/World_Food_Facts/Exercises_with_solutions.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex1 - Getting and knowing your Data\n",
    "Check out [World Food Facts Exercises Video Tutorial](https://youtu.be/_jCSK4cMcVw) to watch a data scientist go through the exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Step 2. Download the dataset to your computer and unzip it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Use the tsv file and assign it to a dataframe called food"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0,3,5,19,20,24,25,26,27,28,36,37,38,39,48) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "food = pd.read_csv('~/Desktop/en.openfoodfacts.org.products.tsv', sep='\\t')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 5 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>code</th>\n",
       "      <th>url</th>\n",
       "      <th>creator</th>\n",
       "      <th>created_t</th>\n",
       "      <th>created_datetime</th>\n",
       "      <th>last_modified_t</th>\n",
       "      <th>last_modified_datetime</th>\n",
       "      <th>product_name</th>\n",
       "      <th>generic_name</th>\n",
       "      <th>quantity</th>\n",
       "      <th>...</th>\n",
       "      <th>fruits-vegetables-nuts_100g</th>\n",
       "      <th>fruits-vegetables-nuts-estimate_100g</th>\n",
       "      <th>collagen-meat-protein-ratio_100g</th>\n",
       "      <th>cocoa_100g</th>\n",
       "      <th>chlorophyl_100g</th>\n",
       "      <th>carbon-footprint_100g</th>\n",
       "      <th>nutrition-score-fr_100g</th>\n",
       "      <th>nutrition-score-uk_100g</th>\n",
       "      <th>glycemic-index_100g</th>\n",
       "      <th>water-hardness_100g</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3087</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>openfoodfacts-contributors</td>\n",
       "      <td>1474103866</td>\n",
       "      <td>2016-09-17T09:17:46Z</td>\n",
       "      <td>1474103893</td>\n",
       "      <td>2016-09-17T09:18:13Z</td>\n",
       "      <td>Farine de blé noir</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1kg</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4530</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>Banana Chips Sweetened (Whole)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4559</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>Peanuts</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16087</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489055731</td>\n",
       "      <td>2017-03-09T10:35:31Z</td>\n",
       "      <td>1489055731</td>\n",
       "      <td>2017-03-09T10:35:31Z</td>\n",
       "      <td>Organic Salted Nut Mix</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16094</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489055653</td>\n",
       "      <td>2017-03-09T10:34:13Z</td>\n",
       "      <td>1489055653</td>\n",
       "      <td>2017-03-09T10:34:13Z</td>\n",
       "      <td>Organic Polenta</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 163 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    code                                                url  \\\n",
       "0   3087  http://world-en.openfoodfacts.org/product/0000...   \n",
       "1   4530  http://world-en.openfoodfacts.org/product/0000...   \n",
       "2   4559  http://world-en.openfoodfacts.org/product/0000...   \n",
       "3  16087  http://world-en.openfoodfacts.org/product/0000...   \n",
       "4  16094  http://world-en.openfoodfacts.org/product/0000...   \n",
       "\n",
       "                      creator   created_t      created_datetime  \\\n",
       "0  openfoodfacts-contributors  1474103866  2016-09-17T09:17:46Z   \n",
       "1             usda-ndb-import  1489069957  2017-03-09T14:32:37Z   \n",
       "2             usda-ndb-import  1489069957  2017-03-09T14:32:37Z   \n",
       "3             usda-ndb-import  1489055731  2017-03-09T10:35:31Z   \n",
       "4             usda-ndb-import  1489055653  2017-03-09T10:34:13Z   \n",
       "\n",
       "  last_modified_t last_modified_datetime                    product_name  \\\n",
       "0      1474103893   2016-09-17T09:18:13Z              Farine de blé noir   \n",
       "1      1489069957   2017-03-09T14:32:37Z  Banana Chips Sweetened (Whole)   \n",
       "2      1489069957   2017-03-09T14:32:37Z                         Peanuts   \n",
       "3      1489055731   2017-03-09T10:35:31Z          Organic Salted Nut Mix   \n",
       "4      1489055653   2017-03-09T10:34:13Z                 Organic Polenta   \n",
       "\n",
       "  generic_name quantity         ...         fruits-vegetables-nuts_100g  \\\n",
       "0          NaN      1kg         ...                                 NaN   \n",
       "1          NaN      NaN         ...                                 NaN   \n",
       "2          NaN      NaN         ...                                 NaN   \n",
       "3          NaN      NaN         ...                                 NaN   \n",
       "4          NaN      NaN         ...                                 NaN   \n",
       "\n",
       "  fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g  \\\n",
       "0                                  NaN                              NaN   \n",
       "1                                  NaN                              NaN   \n",
       "2                                  NaN                              NaN   \n",
       "3                                  NaN                              NaN   \n",
       "4                                  NaN                              NaN   \n",
       "\n",
       "  cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g  \\\n",
       "0        NaN             NaN                   NaN                     NaN   \n",
       "1        NaN             NaN                   NaN                    14.0   \n",
       "2        NaN             NaN                   NaN                     0.0   \n",
       "3        NaN             NaN                   NaN                    12.0   \n",
       "4        NaN             NaN                   NaN                     NaN   \n",
       "\n",
       "  nutrition-score-uk_100g glycemic-index_100g water-hardness_100g  \n",
       "0                     NaN                 NaN                 NaN  \n",
       "1                    14.0                 NaN                 NaN  \n",
       "2                     0.0                 NaN                 NaN  \n",
       "3                    12.0                 NaN                 NaN  \n",
       "4                     NaN                 NaN                 NaN  \n",
       "\n",
       "[5 rows x 163 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(356027, 163)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.shape #will give you both (observations/rows, columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "356027"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.shape[0] #will give you only the observations/rows number"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(356027, 163)\n",
      "163\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 356027 entries, 0 to 356026\n",
      "Columns: 163 entries, code to water-hardness_100g\n",
      "dtypes: float64(107), object(56)\n",
      "memory usage: 442.8+ MB\n"
     ]
    }
   ],
   "source": [
    "print(food.shape) #will give you both (observations/rows, columns)\n",
    "print(food.shape[1]) #will give you only the columns number\n",
    "\n",
    "#OR\n",
    "\n",
    "food.info() #Columns: 163 entries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'code', u'url', u'creator', u'created_t', u'created_datetime',\n",
       "       u'last_modified_t', u'last_modified_datetime', u'product_name',\n",
       "       u'generic_name', u'quantity',\n",
       "       ...\n",
       "       u'fruits-vegetables-nuts_100g', u'fruits-vegetables-nuts-estimate_100g',\n",
       "       u'collagen-meat-protein-ratio_100g', u'cocoa_100g', u'chlorophyl_100g',\n",
       "       u'carbon-footprint_100g', u'nutrition-score-fr_100g',\n",
       "       u'nutrition-score-uk_100g', u'glycemic-index_100g',\n",
       "       u'water-hardness_100g'],\n",
       "      dtype='object', length=163)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. What is the name of 105th column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'-glucose_100g'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.columns[104]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. What is the type of the observations of the 105th column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.dtypes['-glucose_100g']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=356027, step=1)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. What is the product name of the 19th observation?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Lotus Organic Brown Jasmine Rice'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food.values[18][7]"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: 01_Getting_&_Knowing_Your_Data/World_Food_Facts/Solutions.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex1 - Getting and knowing your Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1. Go to https://www.kaggle.com/openfoodfacts/world-food-facts/data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Step 2. Download the dataset to your computer and unzip it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Use the tsv file and assign it to a dataframe called food"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (0,3,5,27,36) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. See the first 5 entries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>code</th>\n",
       "      <th>url</th>\n",
       "      <th>creator</th>\n",
       "      <th>created_t</th>\n",
       "      <th>created_datetime</th>\n",
       "      <th>last_modified_t</th>\n",
       "      <th>last_modified_datetime</th>\n",
       "      <th>product_name</th>\n",
       "      <th>generic_name</th>\n",
       "      <th>quantity</th>\n",
       "      <th>...</th>\n",
       "      <th>fruits-vegetables-nuts_100g</th>\n",
       "      <th>fruits-vegetables-nuts-estimate_100g</th>\n",
       "      <th>collagen-meat-protein-ratio_100g</th>\n",
       "      <th>cocoa_100g</th>\n",
       "      <th>chlorophyl_100g</th>\n",
       "      <th>carbon-footprint_100g</th>\n",
       "      <th>nutrition-score-fr_100g</th>\n",
       "      <th>nutrition-score-uk_100g</th>\n",
       "      <th>glycemic-index_100g</th>\n",
       "      <th>water-hardness_100g</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3087</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>openfoodfacts-contributors</td>\n",
       "      <td>1474103866</td>\n",
       "      <td>2016-09-17T09:17:46Z</td>\n",
       "      <td>1474103893</td>\n",
       "      <td>2016-09-17T09:18:13Z</td>\n",
       "      <td>Farine de blé noir</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1kg</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4530</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>Banana Chips Sweetened (Whole)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4559</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>1489069957</td>\n",
       "      <td>2017-03-09T14:32:37Z</td>\n",
       "      <td>Peanuts</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16087</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489055731</td>\n",
       "      <td>2017-03-09T10:35:31Z</td>\n",
       "      <td>1489055731</td>\n",
       "      <td>2017-03-09T10:35:31Z</td>\n",
       "      <td>Organic Salted Nut Mix</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16094</td>\n",
       "      <td>http://world-en.openfoodfacts.org/product/0000...</td>\n",
       "      <td>usda-ndb-import</td>\n",
       "      <td>1489055653</td>\n",
       "      <td>2017-03-09T10:34:13Z</td>\n",
       "      <td>1489055653</td>\n",
       "      <td>2017-03-09T10:34:13Z</td>\n",
       "      <td>Organic Polenta</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 163 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    code                                                url  \\\n",
       "0   3087  http://world-en.openfoodfacts.org/product/0000...   \n",
       "1   4530  http://world-en.openfoodfacts.org/product/0000...   \n",
       "2   4559  http://world-en.openfoodfacts.org/product/0000...   \n",
       "3  16087  http://world-en.openfoodfacts.org/product/0000...   \n",
       "4  16094  http://world-en.openfoodfacts.org/product/0000...   \n",
       "\n",
       "                      creator   created_t      created_datetime  \\\n",
       "0  openfoodfacts-contributors  1474103866  2016-09-17T09:17:46Z   \n",
       "1             usda-ndb-import  1489069957  2017-03-09T14:32:37Z   \n",
       "2             usda-ndb-import  1489069957  2017-03-09T14:32:37Z   \n",
       "3             usda-ndb-import  1489055731  2017-03-09T10:35:31Z   \n",
       "4             usda-ndb-import  1489055653  2017-03-09T10:34:13Z   \n",
       "\n",
       "  last_modified_t last_modified_datetime                    product_name  \\\n",
       "0      1474103893   2016-09-17T09:18:13Z              Farine de blé noir   \n",
       "1      1489069957   2017-03-09T14:32:37Z  Banana Chips Sweetened (Whole)   \n",
       "2      1489069957   2017-03-09T14:32:37Z                         Peanuts   \n",
       "3      1489055731   2017-03-09T10:35:31Z          Organic Salted Nut Mix   \n",
       "4      1489055653   2017-03-09T10:34:13Z                 Organic Polenta   \n",
       "\n",
       "  generic_name quantity         ...         fruits-vegetables-nuts_100g  \\\n",
       "0          NaN      1kg         ...                                 NaN   \n",
       "1          NaN      NaN         ...                                 NaN   \n",
       "2          NaN      NaN         ...                                 NaN   \n",
       "3          NaN      NaN         ...                                 NaN   \n",
       "4          NaN      NaN         ...                                 NaN   \n",
       "\n",
       "  fruits-vegetables-nuts-estimate_100g collagen-meat-protein-ratio_100g  \\\n",
       "0                                  NaN                              NaN   \n",
       "1                                  NaN                              NaN   \n",
       "2                                  NaN                              NaN   \n",
       "3                                  NaN                              NaN   \n",
       "4                                  NaN                              NaN   \n",
       "\n",
       "  cocoa_100g chlorophyl_100g carbon-footprint_100g nutrition-score-fr_100g  \\\n",
       "0        NaN             NaN                   NaN                     NaN   \n",
       "1        NaN             NaN                   NaN                    14.0   \n",
       "2        NaN             NaN                   NaN                     0.0   \n",
       "3        NaN             NaN                   NaN                    12.0   \n",
       "4        NaN             NaN                   NaN                     NaN   \n",
       "\n",
       "  nutrition-score-uk_100g glycemic-index_100g water-hardness_100g  \n",
       "0                     NaN                 NaN                 NaN  \n",
       "1                    14.0                 NaN                 NaN  \n",
       "2                     0.0                 NaN                 NaN  \n",
       "3                    12.0                 NaN                 NaN  \n",
       "4                     NaN                 NaN                 NaN  \n",
       "\n",
       "[5 rows x 163 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the number of observations in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(356027, 163)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "356027"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. What is the number of columns in the dataset?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(356027, 163)\n",
      "163\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 356027 entries, 0 to 356026\n",
      "Columns: 163 entries, code to water-hardness_100g\n",
      "dtypes: float64(107), object(56)\n",
      "memory usage: 442.8+ MB\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Print the name of all the columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['code', 'url', 'creator', 'created_t', 'created_datetime',\n",
       "       'last_modified_t', 'last_modified_datetime', 'product_name',\n",
       "       'generic_name', 'quantity',\n",
       "       ...\n",
       "       'fruits-vegetables-nuts_100g', 'fruits-vegetables-nuts-estimate_100g',\n",
       "       'collagen-meat-protein-ratio_100g', 'cocoa_100g', 'chlorophyl_100g',\n",
       "       'carbon-footprint_100g', 'nutrition-score-fr_100g',\n",
       "       'nutrition-score-uk_100g', 'glycemic-index_100g',\n",
       "       'water-hardness_100g'],\n",
       "      dtype='object', length=163)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. What is the name of 105th column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'-glucose_100g'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. What is the type of the observations of the 105th column?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 10. How is the dataset indexed?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=356027, step=1)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 11. What is the product name of the 19th observation?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Lotus Organic Brown Jasmine Rice'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: 02_Filtering_&_Sorting/Chipotle/Exercises.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex1 - Filtering and Sorting Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called chipo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. How many products cost more than $10.00?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the price of each item? \n",
    "###### print a data frame with only three columns item_name choice_description and product_price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. Sort by the name of the item"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. What was the quantity of the most expensive item ordered?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. How many times was a Veggie Salad Bowl ordered?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. How many times did someone order more than one Canned Soda?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}


================================================
FILE: 02_Filtering_&_Sorting/Chipotle/Exercises_with_solutions.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ex1 - Filtering and Sorting Data\n",
    "\n",
    "Check out [Chipotle Exercises Video Tutorial](https://youtu.be/ZZPiWZpdekA) to watch a data scientist go through the exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This time we are going to pull data directly from the internet.\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called chipo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n",
    "\n",
    "chipo = pd.read_csv(url, sep = '\\t')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. How many products cost more than $10.00?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$2.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200</th>\n",
       "      <td>89</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>317</th>\n",
       "      <td>138</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>150</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$2.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>160</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779</th>\n",
       "      <td>321</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1216</th>\n",
       "      <td>496</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1662</th>\n",
       "      <td>672</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1953</th>\n",
       "      <td>790</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2135</th>\n",
       "      <td>859</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$2.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2544</th>\n",
       "      <td>1009</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2850</th>\n",
       "      <td>1132</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3592</th>\n",
       "      <td>1440</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$2.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3793</th>\n",
       "      <td>1518</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4008</th>\n",
       "      <td>1604</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>$1.09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      order_id  quantity    item_name choice_description item_price\n",
       "162         73         2  Canned Soda        [Diet Coke]     $2.18 \n",
       "200         89         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "317        138         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "350        150         2  Canned Soda        [Diet Coke]     $2.18 \n",
       "370        160         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "779        321         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "1216       496         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "1662       672         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "1953       790         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "2135       859         2  Canned Soda        [Diet Coke]     $2.18 \n",
       "2544      1009         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "2850      1132         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "3592      1440         2  Canned Soda        [Diet Coke]     $2.18 \n",
       "3793      1518         1  Canned Soda        [Diet Coke]     $1.09 \n",
       "4008      1604         1  Canned Soda        [Diet Coke]     $1.09 "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the item price column is actullay the price of the product multiplied by the quantity\n",
    "chipo.loc[(chipo[\"choice_description\"] == '[Diet Coke]') & (chipo[\"item_name\"] == \"Canned Soda\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "      <th>product_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "      <td>2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>3.39</td>\n",
       "      <td>3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>3.39</td>\n",
       "      <td>3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "      <td>2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>16.98</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4617</th>\n",
       "      <td>1833</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...</td>\n",
       "      <td>11.75</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4618</th>\n",
       "      <td>1833</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...</td>\n",
       "      <td>11.75</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4619</th>\n",
       "      <td>1834</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...</td>\n",
       "      <td>11.25</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4620</th>\n",
       "      <td>1834</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...</td>\n",
       "      <td>8.75</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4621</th>\n",
       "      <td>1834</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...</td>\n",
       "      <td>8.75</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4622 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      order_id  quantity                              item_name  \\\n",
       "0            1         1           Chips and Fresh Tomato Salsa   \n",
       "1            1         1                                   Izze   \n",
       "2            1         1                       Nantucket Nectar   \n",
       "3            1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4            2         2                           Chicken Bowl   \n",
       "...        ...       ...                                    ...   \n",
       "4617      1833         1                          Steak Burrito   \n",
       "4618      1833         1                          Steak Burrito   \n",
       "4619      1834         1                     Chicken Salad Bowl   \n",
       "4620      1834         1                     Chicken Salad Bowl   \n",
       "4621      1834         1                     Chicken Salad Bowl   \n",
       "\n",
       "                                     choice_description  item_price  \\\n",
       "0                                                   NaN        2.39   \n",
       "1                                          [Clementine]        3.39   \n",
       "2                                               [Apple]        3.39   \n",
       "3                                                   NaN        2.39   \n",
       "4     [Tomatillo-Red Chili Salsa (Hot), [Black Beans...       16.98   \n",
       "...                                                 ...         ...   \n",
       "4617  [Fresh Tomato Salsa, [Rice, Black Beans, Sour ...       11.75   \n",
       "4618  [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...       11.75   \n",
       "4619  [Fresh Tomato Salsa, [Fajita Vegetables, Pinto...       11.25   \n",
       "4620  [Fresh Tomato Salsa, [Fajita Vegetables, Lettu...        8.75   \n",
       "4621  [Fresh Tomato Salsa, [Fajita Vegetables, Pinto...        8.75   \n",
       "\n",
       "      product_price  \n",
       "0              2.39  \n",
       "1              3.39  \n",
       "2              3.39  \n",
       "3              2.39  \n",
       "4              8.49  \n",
       "...             ...  \n",
       "4617          11.75  \n",
       "4618          11.75  \n",
       "4619          11.25  \n",
       "4620           8.75  \n",
       "4621           8.75  \n",
       "\n",
       "[4622 rows x 6 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# adding a new column representing the price of each single product in float\n",
    "chipo[\"item_price\"] = chipo[\"item_price\"].str.replace(\"$\", \"\", regex=False).astype(float)\n",
    "chipo[\"product_price\"] = chipo[\"item_price\"] / chipo[\"quantity\"]\n",
    "chipo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "      <th>product_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>2.18</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200</th>\n",
       "      <td>89</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>317</th>\n",
       "      <td>138</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>150</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>2.18</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>160</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779</th>\n",
       "      <td>321</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1216</th>\n",
       "      <td>496</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1662</th>\n",
       "      <td>672</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1953</th>\n",
       "      <td>790</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2135</th>\n",
       "      <td>859</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>2.18</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2544</th>\n",
       "      <td>1009</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2850</th>\n",
       "      <td>1132</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3592</th>\n",
       "      <td>1440</td>\n",
       "      <td>2</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>2.18</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3793</th>\n",
       "      <td>1518</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4008</th>\n",
       "      <td>1604</td>\n",
       "      <td>1</td>\n",
       "      <td>Canned Soda</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>1.09</td>\n",
       "      <td>1.09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      order_id  quantity    item_name choice_description  item_price  \\\n",
       "162         73         2  Canned Soda        [Diet Coke]        2.18   \n",
       "200         89         1  Canned Soda        [Diet Coke]        1.09   \n",
       "317        138         1  Canned Soda        [Diet Coke]        1.09   \n",
       "350        150         2  Canned Soda        [Diet Coke]        2.18   \n",
       "370        160         1  Canned Soda        [Diet Coke]        1.09   \n",
       "779        321         1  Canned Soda        [Diet Coke]        1.09   \n",
       "1216       496         1  Canned Soda        [Diet Coke]        1.09   \n",
       "1662       672         1  Canned Soda        [Diet Coke]        1.09   \n",
       "1953       790         1  Canned Soda        [Diet Coke]        1.09   \n",
       "2135       859         2  Canned Soda        [Diet Coke]        2.18   \n",
       "2544      1009         1  Canned Soda        [Diet Coke]        1.09   \n",
       "2850      1132         1  Canned Soda        [Diet Coke]        1.09   \n",
       "3592      1440         2  Canned Soda        [Diet Coke]        2.18   \n",
       "3793      1518         1  Canned Soda        [Diet Coke]        1.09   \n",
       "4008      1604         1  Canned Soda        [Diet Coke]        1.09   \n",
       "\n",
       "      product_price  \n",
       "162            1.09  \n",
       "200            1.09  \n",
       "317            1.09  \n",
       "350            1.09  \n",
       "370            1.09  \n",
       "779            1.09  \n",
       "1216           1.09  \n",
       "1662           1.09  \n",
       "1953           1.09  \n",
       "2135           1.09  \n",
       "2544           1.09  \n",
       "2850           1.09  \n",
       "3592           1.09  \n",
       "3793           1.09  \n",
       "4008           1.09  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#checking everything is correct\n",
    "chipo.loc[(chipo[\"choice_description\"] == '[Diet Coke]') & (chipo[\"item_name\"] == \"Canned Soda\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# removing duplicated products\n",
    "filtered_chipo=chipo.drop_duplicates(['item_name','choice_description'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# filtering products that costs more than $10\n",
    "filtered_chipo = filtered_chipo.loc[ filtered_chipo[\"product_price\"]>10.0 , [\"item_name\",\"choice_description\",\"product_price\"] ].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the number of products that cost more than $10.00 is 707\n"
     ]
    }
   ],
   "source": [
    "print(f\"the number of products that cost more than $10.00 is {filtered_chipo.shape[0]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. What is the price of each item? \n",
    "###### print a data frame with only three columns item_name choice_description and product_price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>product_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>10.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Chicken Burrito</td>\n",
       "      <td>[[Tomatillo-Green Chili Salsa (Medium), Tomati...</td>\n",
       "      <td>10.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Barbacoa Bowl</td>\n",
       "      <td>[Roasted Chili Corn Salsa, [Fajita Vegetables,...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>702</th>\n",
       "      <td>Carnitas Bowl</td>\n",
       "      <td>[Roasted Chili Corn Salsa, [Rice, Sour Cream, ...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>703</th>\n",
       "      <td>Barbacoa Bowl</td>\n",
       "      <td>[Roasted Chili Corn Salsa, [Pinto Beans, Sour ...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>704</th>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Rice, Cheese, S...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>705</th>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>706</th>\n",
       "      <td>Veggie Burrito</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Rice, Fajita Ve...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>707 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           item_name                                 choice_description  \\\n",
       "0       Chicken Bowl  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...   \n",
       "1      Steak Burrito  [Tomatillo Red Chili Salsa, [Fajita Vegetables...   \n",
       "2       Chicken Bowl  [Fresh Tomato Salsa, [Fajita Vegetables, Rice,...   \n",
       "3    Chicken Burrito  [[Tomatillo-Green Chili Salsa (Medium), Tomati...   \n",
       "4      Barbacoa Bowl  [Roasted Chili Corn Salsa, [Fajita Vegetables,...   \n",
       "..               ...                                                ...   \n",
       "702    Carnitas Bowl  [Roasted Chili Corn Salsa, [Rice, Sour Cream, ...   \n",
       "703    Barbacoa Bowl  [Roasted Chili Corn Salsa, [Pinto Beans, Sour ...   \n",
       "704    Steak Burrito  [Tomatillo Green Chili Salsa, [Rice, Cheese, S...   \n",
       "705    Steak Burrito  [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...   \n",
       "706   Veggie Burrito  [Tomatillo Green Chili Salsa, [Rice, Fajita Ve...   \n",
       "\n",
       "     product_price  \n",
       "0            10.98  \n",
       "1            11.75  \n",
       "2            11.25  \n",
       "3            10.98  \n",
       "4            11.75  \n",
       "..             ...  \n",
       "702          11.75  \n",
       "703          11.75  \n",
       "704          11.75  \n",
       "705          11.75  \n",
       "706          11.25  \n",
       "\n",
       "[707 rows x 3 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_chipo[[\"item_name\",\"choice_description\",\"product_price\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. Sort by the name of the item"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3389</th>\n",
       "      <td>1360</td>\n",
       "      <td>2</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>12.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>148</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1849</th>\n",
       "      <td>749</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1860</th>\n",
       "      <td>754</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2713</th>\n",
       "      <td>1076</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3422</th>\n",
       "      <td>1373</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>553</th>\n",
       "      <td>230</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1916</th>\n",
       "      <td>774</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1922</th>\n",
       "      <td>776</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1937</th>\n",
       "      <td>784</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3836</th>\n",
       "      <td>1537</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>129</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Sprite]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1976</th>\n",
       "      <td>798</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1167</th>\n",
       "      <td>481</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3875</th>\n",
       "      <td>1554</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1124</th>\n",
       "      <td>465</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3886</th>\n",
       "      <td>1558</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108</th>\n",
       "      <td>849</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3010</th>\n",
       "      <td>1196</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4535</th>\n",
       "      <td>1803</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Lemonade]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4169</th>\n",
       "      <td>1664</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4174</th>\n",
       "      <td>1666</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4527</th>\n",
       "      <td>1800</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4522</th>\n",
       "      <td>1798</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3806</th>\n",
       "      <td>1525</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Sprite]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2389</th>\n",
       "      <td>949</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3132</th>\n",
       "      <td>1248</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3141</th>\n",
       "      <td>1253</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Lemonade]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>639</th>\n",
       "      <td>264</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Diet Coke]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1026</th>\n",
       "      <td>422</td>\n",
       "      <td>1</td>\n",
       "      <td>6 Pack Soft Drink</td>\n",
       "      <td>[Sprite]</td>\n",
       "      <td>6.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2996</th>\n",
       "      <td>1192</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad</td>\n",
       "      <td>[Roasted Chili Corn Salsa (Medium), [Black Bea...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3163</th>\n",
       "      <td>1263</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad</td>\n",
       "      <td>[[Fresh Tomato Salsa (Mild), Roasted Chili Cor...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4084</th>\n",
       "      <td>1635</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad</td>\n",
       "      <td>[[Fresh Tomato Salsa (Mild), Roasted Chili Cor...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1694</th>\n",
       "      <td>686</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad</td>\n",
       "      <td>[[Fresh Tomato Salsa (Mild), Roasted Chili Cor...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2756</th>\n",
       "      <td>1094</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad</td>\n",
       "      <td>[[Tomatillo-Green Chili Salsa (Medium), Roaste...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4201</th>\n",
       "      <td>1677</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Black...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1884</th>\n",
       "      <td>760</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>455</th>\n",
       "      <td>195</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3223</th>\n",
       "      <td>1289</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2223</th>\n",
       "      <td>896</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Roasted Chili Corn Salsa, Fajita Vegetables]</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2269</th>\n",
       "      <td>913</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4541</th>\n",
       "      <td>1805</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Fajita Vegetabl...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3293</th>\n",
       "      <td>1321</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Chees...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>83</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>960</th>\n",
       "      <td>394</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1316</th>\n",
       "      <td>536</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2156</th>\n",
       "      <td>869</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4261</th>\n",
       "      <td>1700</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>128</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4573</th>\n",
       "      <td>1818</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2683</th>\n",
       "      <td>1066</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Roasted Chili Corn Salsa, [Fajita Vegetables,...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>496</th>\n",
       "      <td>207</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Lettuce, Guacamole...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4109</th>\n",
       "      <td>1646</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Salad Bowl</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>738</th>\n",
       "      <td>304</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3889</th>\n",
       "      <td>1559</td>\n",
       "      <td>2</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Black Beans, Rice...</td>\n",
       "      <td>16.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2384</th>\n",
       "      <td>948</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Roasted Chili Corn Salsa, [Fajita Vegetables,...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>781</th>\n",
       "      <td>322</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Fresh Tomato Salsa, [Black Beans, Cheese, Sou...</td>\n",
       "      <td>8.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2851</th>\n",
       "      <td>1132</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Roasted Chili Corn Salsa (Medium), [Black Bea...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1699</th>\n",
       "      <td>688</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...</td>\n",
       "      <td>11.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1395</th>\n",
       "      <td>567</td>\n",
       "      <td>1</td>\n",
       "      <td>Veggie Soft Tacos</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Pinto Beans, Rice...</td>\n",
       "      <td>8.49</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4622 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      order_id  quantity          item_name  \\\n",
       "3389      1360         2  6 Pack Soft Drink   \n",
       "341        148         1  6 Pack Soft Drink   \n",
       "1849       749         1  6 Pack Soft Drink   \n",
       "1860       754         1  6 Pack Soft Drink   \n",
       "2713      1076         1  6 Pack Soft Drink   \n",
       "3422      1373         1  6 Pack Soft Drink   \n",
       "553        230         1  6 Pack Soft Drink   \n",
       "1916       774         1  6 Pack Soft Drink   \n",
       "1922       776         1  6 Pack Soft Drink   \n",
       "1937       784         1  6 Pack Soft Drink   \n",
       "3836      1537         1  6 Pack Soft Drink   \n",
       "298        129         1  6 Pack Soft Drink   \n",
       "1976       798         1  6 Pack Soft Drink   \n",
       "1167       481         1  6 Pack Soft Drink   \n",
       "3875      1554         1  6 Pack Soft Drink   \n",
       "1124       465         1  6 Pack Soft Drink   \n",
       "3886      1558         1  6 Pack Soft Drink   \n",
       "2108       849         1  6 Pack Soft Drink   \n",
       "3010      1196         1  6 Pack Soft Drink   \n",
       "4535      1803         1  6 Pack Soft Drink   \n",
       "4169      1664         1  6 Pack Soft Drink   \n",
       "4174      1666         1  6 Pack Soft Drink   \n",
       "4527      1800         1  6 Pack Soft Drink   \n",
       "4522      1798         1  6 Pack Soft Drink   \n",
       "3806      1525         1  6 Pack Soft Drink   \n",
       "2389       949         1  6 Pack Soft Drink   \n",
       "3132      1248         1  6 Pack Soft Drink   \n",
       "3141      1253         1  6 Pack Soft Drink   \n",
       "639        264         1  6 Pack Soft Drink   \n",
       "1026       422         1  6 Pack Soft Drink   \n",
       "...        ...       ...                ...   \n",
       "2996      1192         1       Veggie Salad   \n",
       "3163      1263         1       Veggie Salad   \n",
       "4084      1635         1       Veggie Salad   \n",
       "1694       686         1       Veggie Salad   \n",
       "2756      1094         1       Veggie Salad   \n",
       "4201      1677         1  Veggie Salad Bowl   \n",
       "1884       760         1  Veggie Salad Bowl   \n",
       "455        195         1  Veggie Salad Bowl   \n",
       "3223      1289         1  Veggie Salad Bowl   \n",
       "2223       896         1  Veggie Salad Bowl   \n",
       "2269       913         1  Veggie Salad Bowl   \n",
       "4541      1805         1  Veggie Salad Bowl   \n",
       "3293      1321         1  Veggie Salad Bowl   \n",
       "186         83         1  Veggie Salad Bowl   \n",
       "960        394         1  Veggie Salad Bowl   \n",
       "1316       536         1  Veggie Salad Bowl   \n",
       "2156       869         1  Veggie Salad Bowl   \n",
       "4261      1700         1  Veggie Salad Bowl   \n",
       "295        128         1  Veggie Salad Bowl   \n",
       "4573      1818         1  Veggie Salad Bowl   \n",
       "2683      1066         1  Veggie Salad Bowl   \n",
       "496        207         1  Veggie Salad Bowl   \n",
       "4109      1646         1  Veggie Salad Bowl   \n",
       "738        304         1  Veggie Soft Tacos   \n",
       "3889      1559         2  Veggie Soft Tacos   \n",
       "2384       948         1  Veggie Soft Tacos   \n",
       "781        322         1  Veggie Soft Tacos   \n",
       "2851      1132         1  Veggie Soft Tacos   \n",
       "1699       688         1  Veggie Soft Tacos   \n",
       "1395       567         1  Veggie Soft Tacos   \n",
       "\n",
       "                                     choice_description  item_price  \n",
       "3389                                        [Diet Coke]       12.98  \n",
       "341                                         [Diet Coke]        6.49  \n",
       "1849                                             [Coke]        6.49  \n",
       "1860                                        [Diet Coke]        6.49  \n",
       "2713                                             [Coke]        6.49  \n",
       "3422                                             [Coke]        6.49  \n",
       "553                                         [Diet Coke]        6.49  \n",
       "1916                                        [Diet Coke]        6.49  \n",
       "1922                                             [Coke]        6.49  \n",
       "1937                                        [Diet Coke]        6.49  \n",
       "3836                                             [Coke]        6.49  \n",
       "298                                            [Sprite]        6.49  \n",
       "1976                                        [Diet Coke]        6.49  \n",
       "1167                                             [Coke]        6.49  \n",
       "3875                                        [Diet Coke]        6.49  \n",
       "1124                                             [Coke]        6.49  \n",
       "3886                                        [Diet Coke]        6.49  \n",
       "2108                                             [Coke]        6.49  \n",
       "3010                                        [Diet Coke]        6.49  \n",
       "4535                                         [Lemonade]        6.49  \n",
       "4169                                        [Diet Coke]        6.49  \n",
       "4174                                             [Coke]        6.49  \n",
       "4527                                        [Diet Coke]        6.49  \n",
       "4522                                        [Diet Coke]        6.49  \n",
       "3806                                           [Sprite]        6.49  \n",
       "2389                                             [Coke]        6.49  \n",
       "3132                                        [Diet Coke]        6.49  \n",
       "3141                                         [Lemonade]        6.49  \n",
       "639                                         [Diet Coke]        6.49  \n",
       "1026                                           [Sprite]        6.49  \n",
       "...                                                 ...         ...  \n",
       "2996  [Roasted Chili Corn Salsa (Medium), [Black Bea...        8.49  \n",
       "3163  [[Fresh Tomato Salsa (Mild), Roasted Chili Cor...        8.49  \n",
       "4084  [[Fresh Tomato Salsa (Mild), Roasted Chili Cor...        8.49  \n",
       "1694  [[Fresh Tomato Salsa (Mild), Roasted Chili Cor...        8.49  \n",
       "2756  [[Tomatillo-Green Chili Salsa (Medium), Roaste...        8.49  \n",
       "4201  [Fresh Tomato Salsa, [Fajita Vegetables, Black...       11.25  \n",
       "1884  [Fresh Tomato Sals

Download .txt

gitextract_4ujte8zq/

├── .github/
│   └── FUNDING.yml
├── .gitignore
├── 01_Getting_&_Knowing_Your_Data/
│   ├── Chipotle/
│   │   ├── Exercise_with_Solutions.ipynb
│   │   ├── Exercises.ipynb
│   │   └── Solutions.ipynb
│   ├── Occupation/
│   │   ├── Exercise_with_Solution.ipynb
│   │   ├── Exercises.ipynb
│   │   └── Solutions.ipynb
│   └── World_Food_Facts/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       └── Solutions.ipynb
├── 02_Filtering_&_Sorting/
│   ├── Chipotle/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   ├── Euro12/
│   │   ├── Euro_2012_stats_TEAM.csv
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_Solutions.ipynb
│   │   └── Solutions.ipynb
│   └── Fictional_Army/
│       ├── Exercise.ipynb
│       ├── Exercise_with_solutions.ipynb
│       └── Solutions.ipynb
├── 03_Grouping/
│   ├── Alcohol_Consumption/
│   │   ├── Exercise.ipynb
│   │   ├── Exercise_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   ├── Occupation/
│   │   ├── Exercise.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   └── Regiment/
│       ├── Exercises.ipynb
│       ├── Exercises_solutions.ipynb
│       └── Solutions.ipynb
├── 04_Apply/
│   ├── Students_Alcohol_Consumption/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   └── student-mat.csv
│   └── US_Crime_Rates/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       ├── Solutions.ipynb
│       └── US_Crime_Rates_1960_2014.csv
├── 05_Merge/
│   ├── Auto_MPG/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   ├── cars1.csv
│   │   └── cars2.csv
│   ├── Fictitous_Names/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   └── Solutions.ipynb
│   └── Housing_Market/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       └── Solutions.ipynb
├── 06_Stats/
│   ├── US_Baby_Names/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   └── US_Baby_Names_right.csv
│   └── Wind_Stats/
│       ├── Exercises.ipynb
│       ├── Exercises_with_solutions.ipynb
│       ├── Solutions.ipynb
│       ├── wind.data
│       └── wind.desc
├── 07_Visualization/
│   ├── Chipotle/
│   │   ├── Exercise_with_Solutions.ipynb
│   │   ├── Exercises.ipynb
│   │   └── Solutions.ipynb
│   ├── Online_Retail/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions_code.ipynb
│   │   ├── Online_Retail.csv
│   │   └── Solutions.ipynb
│   ├── Scores/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions_code.ipynb
│   │   └── Solutions.ipynb
│   ├── Tips/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_code_and_solutions.ipynb
│   │   ├── Solutions.ipynb
│   │   └── tips.csv
│   └── Titanic_Disaster/
│       ├── Exercises.ipynb
│       ├── Exercises_code_with_solutions.ipynb
│       ├── Solutions.ipynb
│       └── train.csv
├── 08_Creating_Series_and_DataFrames/
│   └── Pokemon/
│       ├── Exercises-with-solutions-and-code.ipynb
│       ├── Exercises.ipynb
│       └── Solutions.ipynb
├── 09_Time_Series/
│   ├── Apple_Stock/
│   │   ├── Exercises-with-solutions-code.ipynb
│   │   ├── Exercises.ipynb
│   │   ├── Solutions.ipynb
│   │   └── appl_1980_2014.csv
│   ├── Getting_Financial_Data/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_solutions.ipynb
│   │   ├── Exercises_with_solutions_and_code.ipynb
│   │   └── Solutions.ipynb
│   └── Investor_Flow_of_Funds_US/
│       ├── Exercises.ipynb
│       ├── Exercises_with_code_and_solutions.ipynb
│       └── Solutions.ipynb
├── 10_Deleting/
│   ├── Iris/
│   │   ├── Exercises.ipynb
│   │   ├── Exercises_with_solutions_and_code.ipynb
│   │   └── Solutions.ipynb
│   └── Wine/
│       ├── Exercises.ipynb
│       ├── Exercises_code_and_solutions.ipynb
│       └── Solutions.ipynb
├── 11_Indexing/
│   └── Exercises.ipynb
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── Template/
│   ├── Exercises.ipynb
│   └── Solutions.ipynb
└── requirements.txt

Download .json

Condensed preview — 103 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,238K chars).

[
  {
    "path": ".github/FUNDING.yml",
    "chars": 663,
    "preview": "# These are supported funding model platforms\n\ngithub: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [u"
  },
  {
    "path": ".gitignore",
    "chars": 45,
    "preview": ".ipynb_checkpoints\n.Rproj\n.Rproj.user\n.python"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/Chipotle/Exercise_with_Solutions.ipynb",
    "chars": 19002,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex2 - Getting and Knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/Chipotle/Exercises.ipynb",
    "chars": 6531,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex2 - Getting and Knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/Chipotle/Solutions.ipynb",
    "chars": 17410,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex2 - Getting and Knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/Occupation/Exercise_with_Solution.ipynb",
    "chars": 28841,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex3 - Getting and Knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/Occupation/Exercises.ipynb",
    "chars": 5656,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex3 - Getting and Knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/Occupation/Solutions.ipynb",
    "chars": 27663,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex3 - Getting and Knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/World_Food_Facts/Exercises.ipynb",
    "chars": 3366,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Exercise 1\"\n   ]\n  },\n  {\n   \"cel"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/World_Food_Facts/Exercises_with_solutions.ipynb",
    "chars": 16518,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex1 - Getting and knowing your Da"
  },
  {
    "path": "01_Getting_&_Knowing_Your_Data/World_Food_Facts/Solutions.ipynb",
    "chars": 16008,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex1 - Getting and knowing your Da"
  },
  {
    "path": "02_Filtering_&_Sorting/Chipotle/Exercises.ipynb",
    "chars": 3390,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex1 - Filtering and Sorting Data\""
  },
  {
    "path": "02_Filtering_&_Sorting/Chipotle/Exercises_with_solutions.ipynb",
    "chars": 57340,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex1 - Filtering and Sorting Data\\"
  },
  {
    "path": "02_Filtering_&_Sorting/Chipotle/Solutions.ipynb",
    "chars": 15832,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex1 - Filtering and Sorting Data\""
  },
  {
    "path": "02_Filtering_&_Sorting/Euro12/Euro_2012_stats_TEAM.csv",
    "chars": 2319,
    "preview": "Team,Goals,Shots on target,Shots off target,Shooting Accuracy,% Goals-to-shots,Total shots (inc. Blocked),Hit Woodwork,P"
  },
  {
    "path": "02_Filtering_&_Sorting/Euro12/Exercises.ipynb",
    "chars": 4617,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex2 - Filtering and Sorting Data\""
  },
  {
    "path": "02_Filtering_&_Sorting/Euro12/Exercises_with_Solutions.ipynb",
    "chars": 74656,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex2 - Filtering and Sorting Data\\"
  },
  {
    "path": "02_Filtering_&_Sorting/Euro12/Solutions.ipynb",
    "chars": 73461,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Filtering and Sorting Data\"\n   ]\n"
  },
  {
    "path": "02_Filtering_&_Sorting/Fictional_Army/Exercise.ipynb",
    "chars": 6810,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Fictional Army - Filtering and So"
  },
  {
    "path": "02_Filtering_&_Sorting/Fictional_Army/Exercise_with_solutions.ipynb",
    "chars": 57943,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Fictional Army - Filtering and So"
  },
  {
    "path": "02_Filtering_&_Sorting/Fictional_Army/Solutions.ipynb",
    "chars": 82994,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Fictional Army - Filtering and So"
  },
  {
    "path": "03_Grouping/Alcohol_Consumption/Exercise.ipynb",
    "chars": 2912,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex - GroupBy\"\n   ]\n  },\n  {\n   \"c"
  },
  {
    "path": "03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb",
    "chars": 21397,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex - GroupBy\\n\",\n    \"\\n\",\n    \"C"
  },
  {
    "path": "03_Grouping/Alcohol_Consumption/Solutions.ipynb",
    "chars": 15623,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# GroupBy\"\n   ]\n  },\n  {\n   \"cell_t"
  },
  {
    "path": "03_Grouping/Occupation/Exercise.ipynb",
    "chars": 2916,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Occupation\"\n   ]\n  },\n  {\n   \"cel"
  },
  {
    "path": "03_Grouping/Occupation/Exercises_with_solutions.ipynb",
    "chars": 17659,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Occupation\\n\",\n    \"\\n\",\n    \"Che"
  },
  {
    "path": "03_Grouping/Occupation/Solutions.ipynb",
    "chars": 15912,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Occupation\"\n   ]\n  },\n  {\n   \"cel"
  },
  {
    "path": "03_Grouping/Regiment/Exercises.ipynb",
    "chars": 4511,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Regiment\"\n   ]\n  },\n  {\n   \"cell_"
  },
  {
    "path": "03_Grouping/Regiment/Exercises_solutions.ipynb",
    "chars": 21449,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Regiment\\n\",\n    \"\\n\",\n    \"Check"
  },
  {
    "path": "03_Grouping/Regiment/Solutions.ipynb",
    "chars": 20410,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Regiment\"\n   ]\n  },\n  {\n   \"cell_"
  },
  {
    "path": "04_Apply/Students_Alcohol_Consumption/Exercises.ipynb",
    "chars": 4016,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Student Alcohol Consumption\"\n   ]"
  },
  {
    "path": "04_Apply/Students_Alcohol_Consumption/Exercises_with_solutions.ipynb",
    "chars": 35752,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Student Alcohol Consumption\"\n   ]"
  },
  {
    "path": "04_Apply/Students_Alcohol_Consumption/Solutions.ipynb",
    "chars": 34690,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Student Alcohol Consumption\"\n   ]"
  },
  {
    "path": "04_Apply/Students_Alcohol_Consumption/student-mat.csv",
    "chars": 41983,
    "preview": "school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsu"
  },
  {
    "path": "04_Apply/US_Crime_Rates/Exercises.ipynb",
    "chars": 3536,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# United States - Crime Rates - 196"
  },
  {
    "path": "04_Apply/US_Crime_Rates/Exercises_with_solutions.ipynb",
    "chars": 28899,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# United States - Crime Rates - 196"
  },
  {
    "path": "04_Apply/US_Crime_Rates/Solutions.ipynb",
    "chars": 24507,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# United States - Crime Rates - 196"
  },
  {
    "path": "04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv",
    "chars": 5052,
    "preview": "Year,Population,Total,Violent,Property,Murder,Forcible_Rape,Robbery,Aggravated_assault,Burglary,Larceny_Theft,Vehicle_Th"
  },
  {
    "path": "05_Merge/Auto_MPG/Exercises.ipynb",
    "chars": 3136,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# MPG Cars\"\n   ]\n  },\n  {\n   \"cell_"
  },
  {
    "path": "05_Merge/Auto_MPG/Exercises_with_solutions.ipynb",
    "chars": 49654,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# MPG Cars\\n\",\n    \"\\n\",\n    \"Check"
  },
  {
    "path": "05_Merge/Auto_MPG/Solutions.ipynb",
    "chars": 48988,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# MPG Cars\\n\",\n    \"\\n\",\n    \"Check"
  },
  {
    "path": "05_Merge/Auto_MPG/cars1.csv",
    "chars": 10163,
    "preview": "mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,,,,,\r18.0,8,307,130,3504,12.0,70,1,chevrolet "
  },
  {
    "path": "05_Merge/Auto_MPG/cars2.csv",
    "chars": 9383,
    "preview": "mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car\r33.0,4,91,53,1795,17.4,76,3,honda civic\r20.0,"
  },
  {
    "path": "05_Merge/Fictitous_Names/Exercises.ipynb",
    "chars": 4224,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Fictitious Names\"\n   ]\n  },\n  {\n "
  },
  {
    "path": "05_Merge/Fictitous_Names/Exercises_with_solutions.ipynb",
    "chars": 22388,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Fictitious Names\\n\",\n    \"\\n\",\n  "
  },
  {
    "path": "05_Merge/Fictitous_Names/Solutions.ipynb",
    "chars": 21472,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Fictitious Names\"\n   ]\n  },\n  {\n "
  },
  {
    "path": "05_Merge/Housing_Market/Exercises.ipynb",
    "chars": 2983,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Housing Market\"\n   ]\n  },\n  {\n   "
  },
  {
    "path": "05_Merge/Housing_Market/Exercises_with_solutions.ipynb",
    "chars": 28022,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Housing Market\"\n   ]\n  },\n  {\n   "
  },
  {
    "path": "05_Merge/Housing_Market/Solutions.ipynb",
    "chars": 27113,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Housing Market\"\n   ]\n  },\n  {\n   "
  },
  {
    "path": "06_Stats/US_Baby_Names/Exercises.ipynb",
    "chars": 4333,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# US - Baby Names\"\n   ]\n  },\n  {\n  "
  },
  {
    "path": "06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb",
    "chars": 21900,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# US - Baby Names\\n\",\n    \"\\n\",\n   "
  },
  {
    "path": "06_Stats/US_Baby_Names/Solutions.ipynb",
    "chars": 26094,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# US - Baby Names\"\n   ]\n  },\n  {\n  "
  },
  {
    "path": "06_Stats/Wind_Stats/Exercises.ipynb",
    "chars": 7677,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Wind Statistics\"\n   ]\n  },\n  {\n  "
  },
  {
    "path": "06_Stats/Wind_Stats/Exercises_with_solutions.ipynb",
    "chars": 156225,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Wind Statistics\\n\",\n    \"\\n\",\n   "
  },
  {
    "path": "06_Stats/Wind_Stats/Solutions.ipynb",
    "chars": 150406,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Wind Statistics\"\n   ]\n  },\n  {\n  "
  },
  {
    "path": "06_Stats/Wind_Stats/wind.data",
    "chars": 532576,
    "preview": "\nYr Mo Dy   RPT   VAL   ROS   KIL   SHA   BIR   DUB   CLA   MUL   CLO   BEL   MAL\n61  1  1 15.04 14.96 13.17  9.29   NaN"
  },
  {
    "path": "06_Stats/Wind_Stats/wind.desc",
    "chars": 954,
    "preview": "wind   daily average wind speeds for 1961-1978 at 12 synoptic meteorological \n       stations in the Republic of Ireland"
  },
  {
    "path": "07_Visualization/Chipotle/Exercise_with_Solutions.ipynb",
    "chars": 41708,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Visualizing Chipotle's Data\\n\",\n "
  },
  {
    "path": "07_Visualization/Chipotle/Exercises.ipynb",
    "chars": 2776,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Visualizing Chipotle's Data\"\n   ]"
  },
  {
    "path": "07_Visualization/Chipotle/Solutions.ipynb",
    "chars": 39609,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Visualizing Chipotle's Data\"\n   ]"
  },
  {
    "path": "07_Visualization/Online_Retail/Exercises.ipynb",
    "chars": 9954,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Online Retails Purchase\"\n   ]\n  }"
  },
  {
    "path": "07_Visualization/Online_Retail/Exercises_with_solutions_code.ipynb",
    "chars": 143779,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Online Retails Purchase\"\n   ]\n  }"
  },
  {
    "path": "07_Visualization/Online_Retail/Solutions.ipynb",
    "chars": 134987,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Online Retails Purchase\"\n   ]\n  }"
  },
  {
    "path": "07_Visualization/Scores/Exercises.ipynb",
    "chars": 4791,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Scores\"\n   ]\n  },\n  {\n   \"cell_ty"
  },
  {
    "path": "07_Visualization/Scores/Exercises_with_solutions_code.ipynb",
    "chars": 34564,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Scores\"\n   ]\n  },\n  {\n   \"cell_ty"
  },
  {
    "path": "07_Visualization/Scores/Solutions.ipynb",
    "chars": 33526,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Scores\"\n   ]\n  },\n  {\n   \"cell_ty"
  },
  {
    "path": "07_Visualization/Tips/Exercises.ipynb",
    "chars": 4483,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Tips\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "07_Visualization/Tips/Exercises_with_code_and_solutions.ipynb",
    "chars": 202769,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Tips\\n\",\n    \"\\n\",\n    \"Check out"
  },
  {
    "path": "07_Visualization/Tips/Solutions.ipynb",
    "chars": 201049,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Tips\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "07_Visualization/Tips/tips.csv",
    "chars": 8810,
    "preview": ",total_bill,tip,sex,smoker,day,time,size\n0,16.99,1.01,Female,No,Sun,Dinner,2\n1,10.34,1.66,Male,No,Sun,Dinner,3\n2,21.01,3"
  },
  {
    "path": "07_Visualization/Titanic_Disaster/Exercises.ipynb",
    "chars": 3110,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Visualizing the Titanic Disaster\""
  },
  {
    "path": "07_Visualization/Titanic_Disaster/Exercises_code_with_solutions.ipynb",
    "chars": 67354,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Visualizing the Titanic Disaster\\"
  },
  {
    "path": "07_Visualization/Titanic_Disaster/Solutions.ipynb",
    "chars": 64405,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Visualizing the Titanic Disaster\""
  },
  {
    "path": "07_Visualization/Titanic_Disaster/train.csv",
    "chars": 60302,
    "preview": "PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked\n1,0,3,\"Braund, Mr. Owen Harris\",male,22,"
  },
  {
    "path": "08_Creating_Series_and_DataFrames/Pokemon/Exercises-with-solutions-and-code.ipynb",
    "chars": 9505,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Pokemon\"\n   ]\n  },\n  {\n   \"cell_t"
  },
  {
    "path": "08_Creating_Series_and_DataFrames/Pokemon/Exercises.ipynb",
    "chars": 4567,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Pokemon\"\n   ]\n  },\n  {\n   \"cell_t"
  },
  {
    "path": "08_Creating_Series_and_DataFrames/Pokemon/Solutions.ipynb",
    "chars": 8825,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Pokemon\"\n   ]\n  },\n  {\n   \"cell_t"
  },
  {
    "path": "09_Time_Series/Apple_Stock/Exercises-with-solutions-code.ipynb",
    "chars": 49776,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Apple Stock\\n\",\n    \"\\n\",\n    \"Ch"
  },
  {
    "path": "09_Time_Series/Apple_Stock/Exercises.ipynb",
    "chars": 4441,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Apple Stock\"\n   ]\n  },\n  {\n   \"ce"
  },
  {
    "path": "09_Time_Series/Apple_Stock/Solutions.ipynb",
    "chars": 48659,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Apple Stock\"\n   ]\n  },\n  {\n   \"ce"
  },
  {
    "path": "09_Time_Series/Apple_Stock/appl_1980_2014.csv",
    "chars": 426412,
    "preview": "Date,Open,High,Low,Close,Volume,Adj Close\n2014-07-08,96.27,96.80,93.92,95.35,65130000,95.35\n2014-07-07,94.14,95.99,94.10"
  },
  {
    "path": "09_Time_Series/Getting_Financial_Data/Exercises.ipynb",
    "chars": 4459,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Getting Financial Data - Pandas D"
  },
  {
    "path": "09_Time_Series/Getting_Financial_Data/Exercises_solutions.ipynb",
    "chars": 36868,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Getting Financial Data - Pandas D"
  },
  {
    "path": "09_Time_Series/Getting_Financial_Data/Exercises_with_solutions_and_code.ipynb",
    "chars": 37832,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Getting Financial Data - Pandas D"
  },
  {
    "path": "09_Time_Series/Getting_Financial_Data/Solutions.ipynb",
    "chars": 35135,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Getting Financial Data - Pandas D"
  },
  {
    "path": "09_Time_Series/Investor_Flow_of_Funds_US/Exercises.ipynb",
    "chars": 3762,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Investor - Flow of Funds - US\"\n  "
  },
  {
    "path": "09_Time_Series/Investor_Flow_of_Funds_US/Exercises_with_code_and_solutions.ipynb",
    "chars": 40559,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Investor - Flow of Funds - US\\n\","
  },
  {
    "path": "09_Time_Series/Investor_Flow_of_Funds_US/Solutions.ipynb",
    "chars": 39820,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Investor - Flow of Funds - US\"\n  "
  },
  {
    "path": "10_Deleting/Iris/Exercises.ipynb",
    "chars": 4048,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Iris\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "10_Deleting/Iris/Exercises_with_solutions_and_code.ipynb",
    "chars": 45536,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Iris\\n\",\n    \"\\n\",\n    \"Check out"
  },
  {
    "path": "10_Deleting/Iris/Solutions.ipynb",
    "chars": 44640,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Iris\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "10_Deleting/Wine/Exercises.ipynb",
    "chars": 5615,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Wine\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "10_Deleting/Wine/Exercises_code_and_solutions.ipynb",
    "chars": 40716,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Wine\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "10_Deleting/Wine/Solutions.ipynb",
    "chars": 39579,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Wine\"\n   ]\n  },\n  {\n   \"cell_type"
  },
  {
    "path": "11_Indexing/Exercises.ipynb",
    "chars": 4851,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex - \"\n   ]\n  },\n  {\n   \"cell_typ"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 3353,
    "preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, w"
  },
  {
    "path": "LICENSE",
    "chars": 1515,
    "preview": "BSD 3-Clause License\n\nCopyright (c) 2018, Guilherme Samora\nAll rights reserved.\n\nRedistribution and use in source and bi"
  },
  {
    "path": "README.md",
    "chars": 5907,
    "preview": "# Pandas Exercises\n\nFed up with a ton of tutorials but no easy way to find exercises I decided to create a repo just wit"
  },
  {
    "path": "Template/Exercises.ipynb",
    "chars": 4819,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex - \"\n   ]\n  },\n  {\n   \"cell_typ"
  },
  {
    "path": "Template/Solutions.ipynb",
    "chars": 4819,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Ex - \"\n   ]\n  },\n  {\n   \"cell_typ"
  },
  {
    "path": "requirements.txt",
    "chars": 61,
    "preview": "numpy==1.22.0\nmatplotlib==2.0.2\nseaborn==0.8.1\npandas==0.23.4"
  }
]

// ... and 2 more files (download for full content)

About this extraction

This page contains the full source code of the guipsamora/pandas_exercises GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 103 files (79.8 MB), approximately 996.6k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo