Repository: cvidan/bet365-scraper
Branch: master
Commit: 2224bc6faaf3
Files: 4
Total size: 10.8 KB

Directory structure:
gitextract_rs9jnmxq/

├── README.md
├── SeleniumScraper.py
├── VegasInsiderScraper.py
└── csv-json.py

================================================
FILE CONTENTS
================================================

================================================
FILE: README.md
================================================
# Bet365 Scraper

This is a Python script to grab data for betting odds from the site https://bet365.com/ using Selenium.

## Why Selenium?

I would have preferred to use something like BeautifulSoup, but Bet365 has always been my go-to site for sportsbetting, and as far as I can tell there isn't a way to link directly to a market. For example, there is nothing along the lines of https://bet365.com/basketball/futures/nba/mvp as a way to get to the market for the NBA's Most Valuable Player. So instead, using Selenium we navigate there in Firefox from the home page.

## How to use

The script uses geckodriver for Selenium. The path it expects to find it is 'C:\Program Files\geckodriver\geckodriver.exe'.

Run the SeleniumScraper.py file with three parameters (each exactly as they appear on the site):
1) The sport (e.g. "Basketball")
2) The market (e.g. "NBA Futures 2018/19")
3) The particular bet (e.g. "Regular Season MVP")

So for example,
``` python SeleniumScraper.py "Basketball" "NBA Futures 2018/19" "Regular Season MVP" ```


================================================
FILE: SeleniumScraper.py
================================================
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from datetime import datetime
from sys import argv

# The parameters, in order, are the names of the sport, market, and particular bet.
# E.g. >python SeleniumScraper.py "Basketball" "NBA Futures 2018/19" "Regular Season MVP"
SPORT = str(argv[1])
MARKET = str(argv[2])
BET = str(argv[3])

# firefox_options = Options().add_argument("--headless")
# firefox_options.add_argument("--headless")
# driver = webdriver.Firefox(executable_path='C:\Program Files\geckodriver\geckodriver.exe', options=firefox_options)
driver = webdriver.Firefox(executable_path='C:\Program Files\geckodriver\geckodriver.exe')

if __name__ == '__main__':
    # teams = ["ATL Hawks",
    #          "BKN Nets",
    #          "BOS Celtics",
    #          "CHA Hornets",
    #          "CHI Bulls",
    #          "CLE Cavaliers",
    #          "DAL Mavericks",
    #          "DEN Nuggets",
    #          "DET Pistons",
    #          "GS Warriors",
    #          "HOU Rockets",
    #          "IND Pacers",
    #          "LA Clippers",
    #          "LA Lakers",
    #          "MEM Grizzlies",
    #          "MIA Heat",
    #          "MIL Bucks",
    #          "MIN Timberwolves",
    #          "NO Pelicans",
    #          "NY Knicks",
    #          "OKC Thunder",
    #          "ORL Magic",
    #          "PHI 76ers",
    #          "PHX Suns",
    #          "POR Trail Blazers",
    #          "SA Spurs",
    #          "SAC Kings",
    #          "TOR Raptors",
    #          "UTA Jazz",
    #          "WAS Wizards"]

    # All the ugly Try-Except blocks are to decrease the runtime of the script. Without them, the script hits errors
    # with trying to access elements which haven't loaded on the page yet. The Try-Except loop basically just keeps
    # trying to access the elements until they've loaded. The other solution I tried was to sleep() for a set time
    # before trying to access the elements. However, it seriously reduced the runtime so I'm sticking with the ugliness.

    driver.get('https://www.bet365.com/')
    driver.find_element_by_link_text("English").click()

    # go to Basketball markets
    while True:
        try:
            left_menu_div = driver.find_elements_by_class_name("wn-WebNavModule")
            left_menu = left_menu_div[0].find_elements_by_class_name("wn-Classification")
            break
        except:
            pass
    for link in left_menu:
        if link.text == SPORT:
            link.click()
            break

    # go to Futures
    while True:
        try:
            driver.find_elements_by_class_name("sl-LiveInPlayHeader_ButtonBarButton")[1].click()
            break
        except:
            pass

    # find the bet
    while True:
        try:
            nba_futures_div = driver.find_element_by_class_name("sm-MarketGroup")
            break
        except:
            pass
    bets = nba_futures_div.find_elements_by_class_name("sm-CouponLink_Label")
    for bet in bets:
        if bet.text == BET:
            bet.click()
            break

    # grab data
    while True:
        try:
            win_outright_div = driver.find_element_by_class_name("gl-MarketGroupContainer")
            teams = win_outright_div.find_elements_by_class_name("gl-Participant_Name")
            american_odds = win_outright_div.find_elements_by_class_name("gl-Participant_Odds")
            break
        except:
            pass

    # make new list with decimal odds
    num_teams = len(teams)
    if num_teams == 30:
        team_odds_pairs = []
        for i in range(len(teams)):
            sign = american_odds[i].text[0]
            value = int(american_odds[i].text[1:])
            if sign == '-':
                new_odds = (100 / value) + 1.0
            else:
                new_odds = (value / 100) + 1.0

            team_odds_pairs.append((teams[i].text, new_odds))
        team_odds_pairs.sort()
        driver.close()
    else:
        raise Exception("There is data for " + str(num_teams) + " rather than the expected 30!")

    # output the data
    output_record = datetime.now().strftime("%Y-%m-%d")
    for pair in team_odds_pairs:
        if pair[1].is_integer():
            odds = int(pair[1])
        else:
            odds = round(pair[1], 9)
        output_record += "," + str(odds)
    print(output_record)
    # print(datetime.now().strftime("%Y-%m-%d"))
    # for pair in team_odds_pairs:
    #     print(str(pair[0]) + "," + str(pair[1]))
    #     # print(pair[1])


================================================
FILE: VegasInsiderScraper.py
================================================
from requests import get
from bs4 import BeautifulSoup
import sys
from datetime import datetime


class VegasInsiderScraper:

    teams = ["Hawks",
             "Nets",
             "Celtics",
             "Hornets",
             "Bulls",
             "Cavaliers",
             "Mavericks",
             "Nuggets",
             "Pistons",
             "Warriors",
             "Rockets",
             "Pacers",
             "Clippers",
             "Lakers",
             "Grizzlies",
             "Heat",
             "Bucks",
             "T-Wolves",    # discrepancy (was Timberwolves)
             "Pelicans",
             "Knicks",
             "Thunder",
             "Magic",
             "76ers",
             "Suns",
             "Blazers",
             "Spurs",
             "Kings",
             "Raptors",
             "Jazz",
             "Wizards"]

    def __init__(self, url, output_filename):
        self.__output_filename = output_filename
        self.__page = get(url)
        if self.__page.status_code == 200:
            self.__soup = BeautifulSoup(self.__page.content, "html.parser")
        else:
            sys.exit(1)
        self.__team_odds_pairs = []

    def scrape(self):
        div = self.__soup.find(id="_")
        text = div.get_text().split('\n\n\n\n')
        num_teams = int(len(text) / 2)
        text = text[:num_teams*2]
        for i in range(len(text)):
            text[i] = text[i].lstrip()
        self.__create_team_odds_list(text)
        self.__append_to_csv()

        # table = self.__soup.find_all('table', class_="table-wrapper cellTextNorm")[0]
        # rows = table.find_all('tr')[1:]
        # num_teams = len(rows)
        # if num_teams == 30:
        #     self.__create_team_odds_list(rows)
        #     self.__append_to_csv()
        # else:
        #     raise Exception("There is data for " + str(num_teams) + " rather than the expected 30")

    def __create_team_odds_list(self, text):
        i = 0
        while i < len(text):
            team = text[i]
            odds = self.__convert_to_decimal(text[i + 1])
            self.__team_odds_pairs.append((team, odds))
            i += 2
        # for i in range(len(text)):
        #     row = text[i].find_all('td')
        #     team = row[0].get_text()
        #     odds = self.__convert_to_decimal(row[1].get_text())
        #     self.__team_odds_pairs.append((team, odds))
        return self.__team_odds_pairs.sort()

    def __convert_to_decimal(self, odds):
        sign = odds[0]
        value = int(odds[1:])
        if sign == '-':
            new_odds = (100 / value) + 1.0
        else:
            new_odds = (value / 100) + 1.0
        # numerator = int(odds.split('/')[0])
        # denominator = int(odds.split('/')[1])
        # new_odds = numerator / denominator + 1
        if new_odds.is_integer():
            return str(int(new_odds))
        else:
            return str(round(new_odds, 9))

    def __append_to_csv(self):
        output_record = datetime.now().strftime("%Y-%m-%d")
        scraped_teams = "DATE"
        for pair in self.__team_odds_pairs:
            scraped_teams += "," + pair[0].split(" ")[-1]
        for odds in self.__create_list_of_scraped_teams(scraped_teams):
            output_record += "," + odds
        with open(self.__output_filename, "a") as out_file:
            out_file.write(output_record + "\n")

    def __create_list_of_scraped_teams(self, scraped_teams):
        i = 0
        odds_list = []
        for team in self.teams:
            if team not in scraped_teams:
                odds_list.append("10000")
            else:
                odds_list.append(self.__team_odds_pairs[i][1])
                i += 1
        return odds_list


if __name__ == "__main__":
    url = "https://www.sportsbook.ag/sbk/sportsbook4/nba-betting/nba-futures-championship.sbk"
    filename = "C:\\Users\\curti\\OneDrive\\Documents\\odds.txt"
    scraper = VegasInsiderScraper(url, filename)
    scraper.scrape()


================================================
FILE: csv-json.py
================================================
import json

if __name__ == "__main__":
    with open("C:\\Users\\curti\\OneDrive\\Documents\\odds.txt", "r") as in_file:
        lines = in_file.readlines()

    column_names = lines[0].strip().split(',')
    column_names[0] = "DATE"
    la_counter = 0  # For distinguishing between LAC and LAL
    for i in range(len(column_names)):
        column_names[i] = column_names[i].split(' ')[0]
        if column_names[i] == "LA":
            if la_counter == 0:
                column_names[i] += "C"
                la_counter += 1
            else:
                column_names[i] += "L"

    records = []

    for i in range(31):
        new_column = []
        for line in lines[1:]:
            row_values = line.strip().split(',')
            for j in range(len(row_values)):
                if j == i:
                    if i > 0:
                        try:
                            appendee = int(row_values[j])
                        except ValueError:
                            appendee = round(float(row_values[j]), 9)
                    else:
                        appendee = row_values[j]
                    new_column.append(appendee)
        records.append(new_column)

    dictionary = dict(zip(column_names, records))

    json_string = json.dumps(dictionary, indent=4)
    print(json_string)

    with open("C:\\Users\\curti\\OneDrive\\Documents\\odds.json", "w") as out_file:
        out_file.write(json_string)