[
  {
    "path": "README.md",
    "content": "# Bet365 Scraper\n\nThis is a Python script to grab data for betting odds from the site https://bet365.com/ using Selenium.\n\n## Why Selenium?\n\nI would have preferred to use something like BeautifulSoup, but Bet365 has always been my go-to site for sportsbetting, and as far as I can tell there isn't a way to link directly to a market. For example, there is nothing along the lines of https://bet365.com/basketball/futures/nba/mvp as a way to get to the market for the NBA's Most Valuable Player. So instead, using Selenium we navigate there in Firefox from the home page.\n\n## How to use\n\nThe script uses geckodriver for Selenium. The path it expects to find it is 'C:\\Program Files\\geckodriver\\geckodriver.exe'.\n\nRun the SeleniumScraper.py file with three parameters (each exactly as they appear on the site):\n1) The sport (e.g. \"Basketball\")\n2) The market (e.g. \"NBA Futures 2018/19\")\n3) The particular bet (e.g. \"Regular Season MVP\")\n\nSo for example,\n``` python SeleniumScraper.py \"Basketball\" \"NBA Futures 2018/19\" \"Regular Season MVP\" ```\n"
  },
  {
    "path": "SeleniumScraper.py",
    "content": "from selenium import webdriver\nfrom selenium.webdriver.firefox.options import Options\nfrom datetime import datetime\nfrom sys import argv\n\n# The parameters, in order, are the names of the sport, market, and particular bet.\n# E.g. >python SeleniumScraper.py \"Basketball\" \"NBA Futures 2018/19\" \"Regular Season MVP\"\nSPORT = str(argv[1])\nMARKET = str(argv[2])\nBET = str(argv[3])\n\n# firefox_options = Options().add_argument(\"--headless\")\n# firefox_options.add_argument(\"--headless\")\n# driver = webdriver.Firefox(executable_path='C:\\Program Files\\geckodriver\\geckodriver.exe', options=firefox_options)\ndriver = webdriver.Firefox(executable_path='C:\\Program Files\\geckodriver\\geckodriver.exe')\n\nif __name__ == '__main__':\n    # teams = [\"ATL Hawks\",\n    #          \"BKN Nets\",\n    #          \"BOS Celtics\",\n    #          \"CHA Hornets\",\n    #          \"CHI Bulls\",\n    #          \"CLE Cavaliers\",\n    #          \"DAL Mavericks\",\n    #          \"DEN Nuggets\",\n    #          \"DET Pistons\",\n    #          \"GS Warriors\",\n    #          \"HOU Rockets\",\n    #          \"IND Pacers\",\n    #          \"LA Clippers\",\n    #          \"LA Lakers\",\n    #          \"MEM Grizzlies\",\n    #          \"MIA Heat\",\n    #          \"MIL Bucks\",\n    #          \"MIN Timberwolves\",\n    #          \"NO Pelicans\",\n    #          \"NY Knicks\",\n    #          \"OKC Thunder\",\n    #          \"ORL Magic\",\n    #          \"PHI 76ers\",\n    #          \"PHX Suns\",\n    #          \"POR Trail Blazers\",\n    #          \"SA Spurs\",\n    #          \"SAC Kings\",\n    #          \"TOR Raptors\",\n    #          \"UTA Jazz\",\n    #          \"WAS Wizards\"]\n\n    # All the ugly Try-Except blocks are to decrease the runtime of the script. Without them, the script hits errors\n    # with trying to access elements which haven't loaded on the page yet. The Try-Except loop basically just keeps\n    # trying to access the elements until they've loaded. The other solution I tried was to sleep() for a set time\n    # before trying to access the elements. However, it seriously reduced the runtime so I'm sticking with the ugliness.\n\n    driver.get('https://www.bet365.com/')\n    driver.find_element_by_link_text(\"English\").click()\n\n    # go to Basketball markets\n    while True:\n        try:\n            left_menu_div = driver.find_elements_by_class_name(\"wn-WebNavModule\")\n            left_menu = left_menu_div[0].find_elements_by_class_name(\"wn-Classification\")\n            break\n        except:\n            pass\n    for link in left_menu:\n        if link.text == SPORT:\n            link.click()\n            break\n\n    # go to Futures\n    while True:\n        try:\n            driver.find_elements_by_class_name(\"sl-LiveInPlayHeader_ButtonBarButton\")[1].click()\n            break\n        except:\n            pass\n\n    # find the bet\n    while True:\n        try:\n            nba_futures_div = driver.find_element_by_class_name(\"sm-MarketGroup\")\n            break\n        except:\n            pass\n    bets = nba_futures_div.find_elements_by_class_name(\"sm-CouponLink_Label\")\n    for bet in bets:\n        if bet.text == BET:\n            bet.click()\n            break\n\n    # grab data\n    while True:\n        try:\n            win_outright_div = driver.find_element_by_class_name(\"gl-MarketGroupContainer\")\n            teams = win_outright_div.find_elements_by_class_name(\"gl-Participant_Name\")\n            american_odds = win_outright_div.find_elements_by_class_name(\"gl-Participant_Odds\")\n            break\n        except:\n            pass\n\n    # make new list with decimal odds\n    num_teams = len(teams)\n    if num_teams == 30:\n        team_odds_pairs = []\n        for i in range(len(teams)):\n            sign = american_odds[i].text[0]\n            value = int(american_odds[i].text[1:])\n            if sign == '-':\n                new_odds = (100 / value) + 1.0\n            else:\n                new_odds = (value / 100) + 1.0\n\n            team_odds_pairs.append((teams[i].text, new_odds))\n        team_odds_pairs.sort()\n        driver.close()\n    else:\n        raise Exception(\"There is data for \" + str(num_teams) + \" rather than the expected 30!\")\n\n    # output the data\n    output_record = datetime.now().strftime(\"%Y-%m-%d\")\n    for pair in team_odds_pairs:\n        if pair[1].is_integer():\n            odds = int(pair[1])\n        else:\n            odds = round(pair[1], 9)\n        output_record += \",\" + str(odds)\n    print(output_record)\n    # print(datetime.now().strftime(\"%Y-%m-%d\"))\n    # for pair in team_odds_pairs:\n    #     print(str(pair[0]) + \",\" + str(pair[1]))\n    #     # print(pair[1])\n\n"
  },
  {
    "path": "VegasInsiderScraper.py",
    "content": "from requests import get\nfrom bs4 import BeautifulSoup\nimport sys\nfrom datetime import datetime\n\n\nclass VegasInsiderScraper:\n\n    teams = [\"Hawks\",\n             \"Nets\",\n             \"Celtics\",\n             \"Hornets\",\n             \"Bulls\",\n             \"Cavaliers\",\n             \"Mavericks\",\n             \"Nuggets\",\n             \"Pistons\",\n             \"Warriors\",\n             \"Rockets\",\n             \"Pacers\",\n             \"Clippers\",\n             \"Lakers\",\n             \"Grizzlies\",\n             \"Heat\",\n             \"Bucks\",\n             \"T-Wolves\",    # discrepancy (was Timberwolves)\n             \"Pelicans\",\n             \"Knicks\",\n             \"Thunder\",\n             \"Magic\",\n             \"76ers\",\n             \"Suns\",\n             \"Blazers\",\n             \"Spurs\",\n             \"Kings\",\n             \"Raptors\",\n             \"Jazz\",\n             \"Wizards\"]\n\n    def __init__(self, url, output_filename):\n        self.__output_filename = output_filename\n        self.__page = get(url)\n        if self.__page.status_code == 200:\n            self.__soup = BeautifulSoup(self.__page.content, \"html.parser\")\n        else:\n            sys.exit(1)\n        self.__team_odds_pairs = []\n\n    def scrape(self):\n        div = self.__soup.find(id=\"_\")\n        text = div.get_text().split('\\n\\n\\n\\n')\n        num_teams = int(len(text) / 2)\n        text = text[:num_teams*2]\n        for i in range(len(text)):\n            text[i] = text[i].lstrip()\n        self.__create_team_odds_list(text)\n        self.__append_to_csv()\n\n        # table = self.__soup.find_all('table', class_=\"table-wrapper cellTextNorm\")[0]\n        # rows = table.find_all('tr')[1:]\n        # num_teams = len(rows)\n        # if num_teams == 30:\n        #     self.__create_team_odds_list(rows)\n        #     self.__append_to_csv()\n        # else:\n        #     raise Exception(\"There is data for \" + str(num_teams) + \" rather than the expected 30\")\n\n    def __create_team_odds_list(self, text):\n        i = 0\n        while i < len(text):\n            team = text[i]\n            odds = self.__convert_to_decimal(text[i + 1])\n            self.__team_odds_pairs.append((team, odds))\n            i += 2\n        # for i in range(len(text)):\n        #     row = text[i].find_all('td')\n        #     team = row[0].get_text()\n        #     odds = self.__convert_to_decimal(row[1].get_text())\n        #     self.__team_odds_pairs.append((team, odds))\n        return self.__team_odds_pairs.sort()\n\n    def __convert_to_decimal(self, odds):\n        sign = odds[0]\n        value = int(odds[1:])\n        if sign == '-':\n            new_odds = (100 / value) + 1.0\n        else:\n            new_odds = (value / 100) + 1.0\n        # numerator = int(odds.split('/')[0])\n        # denominator = int(odds.split('/')[1])\n        # new_odds = numerator / denominator + 1\n        if new_odds.is_integer():\n            return str(int(new_odds))\n        else:\n            return str(round(new_odds, 9))\n\n    def __append_to_csv(self):\n        output_record = datetime.now().strftime(\"%Y-%m-%d\")\n        scraped_teams = \"DATE\"\n        for pair in self.__team_odds_pairs:\n            scraped_teams += \",\" + pair[0].split(\" \")[-1]\n        for odds in self.__create_list_of_scraped_teams(scraped_teams):\n            output_record += \",\" + odds\n        with open(self.__output_filename, \"a\") as out_file:\n            out_file.write(output_record + \"\\n\")\n\n    def __create_list_of_scraped_teams(self, scraped_teams):\n        i = 0\n        odds_list = []\n        for team in self.teams:\n            if team not in scraped_teams:\n                odds_list.append(\"10000\")\n            else:\n                odds_list.append(self.__team_odds_pairs[i][1])\n                i += 1\n        return odds_list\n\n\n\nif __name__ == \"__main__\":\n    url = \"https://www.sportsbook.ag/sbk/sportsbook4/nba-betting/nba-futures-championship.sbk\"\n    filename = \"C:\\\\Users\\\\curti\\\\OneDrive\\\\Documents\\\\odds.txt\"\n    scraper = VegasInsiderScraper(url, filename)\n    scraper.scrape()\n"
  },
  {
    "path": "csv-json.py",
    "content": "import json\n\nif __name__ == \"__main__\":\n    with open(\"C:\\\\Users\\\\curti\\\\OneDrive\\\\Documents\\\\odds.txt\", \"r\") as in_file:\n        lines = in_file.readlines()\n\n    column_names = lines[0].strip().split(',')\n    column_names[0] = \"DATE\"\n    la_counter = 0  # For distinguishing between LAC and LAL\n    for i in range(len(column_names)):\n        column_names[i] = column_names[i].split(' ')[0]\n        if column_names[i] == \"LA\":\n            if la_counter == 0:\n                column_names[i] += \"C\"\n                la_counter += 1\n            else:\n                column_names[i] += \"L\"\n\n    records = []\n\n    for i in range(31):\n        new_column = []\n        for line in lines[1:]:\n            row_values = line.strip().split(',')\n            for j in range(len(row_values)):\n                if j == i:\n                    if i > 0:\n                        try:\n                            appendee = int(row_values[j])\n                        except ValueError:\n                            appendee = round(float(row_values[j]), 9)\n                    else:\n                        appendee = row_values[j]\n                    new_column.append(appendee)\n        records.append(new_column)\n\n    dictionary = dict(zip(column_names, records))\n\n    json_string = json.dumps(dictionary, indent=4)\n    print(json_string)\n\n    with open(\"C:\\\\Users\\\\curti\\\\OneDrive\\\\Documents\\\\odds.json\", \"w\") as out_file:\n        out_file.write(json_string)\n"
  }
]