Repository: cvidan/bet365-scraper Branch: master Commit: 2224bc6faaf3 Files: 4 Total size: 10.8 KB Directory structure: gitextract_rs9jnmxq/ ├── README.md ├── SeleniumScraper.py ├── VegasInsiderScraper.py └── csv-json.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # Bet365 Scraper This is a Python script to grab data for betting odds from the site https://bet365.com/ using Selenium. ## Why Selenium? I would have preferred to use something like BeautifulSoup, but Bet365 has always been my go-to site for sportsbetting, and as far as I can tell there isn't a way to link directly to a market. For example, there is nothing along the lines of https://bet365.com/basketball/futures/nba/mvp as a way to get to the market for the NBA's Most Valuable Player. So instead, using Selenium we navigate there in Firefox from the home page. ## How to use The script uses geckodriver for Selenium. The path it expects to find it is 'C:\Program Files\geckodriver\geckodriver.exe'. Run the SeleniumScraper.py file with three parameters (each exactly as they appear on the site): 1) The sport (e.g. "Basketball") 2) The market (e.g. "NBA Futures 2018/19") 3) The particular bet (e.g. "Regular Season MVP") So for example, ``` python SeleniumScraper.py "Basketball" "NBA Futures 2018/19" "Regular Season MVP" ``` ================================================ FILE: SeleniumScraper.py ================================================ from selenium import webdriver from selenium.webdriver.firefox.options import Options from datetime import datetime from sys import argv # The parameters, in order, are the names of the sport, market, and particular bet. # E.g. >python SeleniumScraper.py "Basketball" "NBA Futures 2018/19" "Regular Season MVP" SPORT = str(argv[1]) MARKET = str(argv[2]) BET = str(argv[3]) # firefox_options = Options().add_argument("--headless") # firefox_options.add_argument("--headless") # driver = webdriver.Firefox(executable_path='C:\Program Files\geckodriver\geckodriver.exe', options=firefox_options) driver = webdriver.Firefox(executable_path='C:\Program Files\geckodriver\geckodriver.exe') if __name__ == '__main__': # teams = ["ATL Hawks", # "BKN Nets", # "BOS Celtics", # "CHA Hornets", # "CHI Bulls", # "CLE Cavaliers", # "DAL Mavericks", # "DEN Nuggets", # "DET Pistons", # "GS Warriors", # "HOU Rockets", # "IND Pacers", # "LA Clippers", # "LA Lakers", # "MEM Grizzlies", # "MIA Heat", # "MIL Bucks", # "MIN Timberwolves", # "NO Pelicans", # "NY Knicks", # "OKC Thunder", # "ORL Magic", # "PHI 76ers", # "PHX Suns", # "POR Trail Blazers", # "SA Spurs", # "SAC Kings", # "TOR Raptors", # "UTA Jazz", # "WAS Wizards"] # All the ugly Try-Except blocks are to decrease the runtime of the script. Without them, the script hits errors # with trying to access elements which haven't loaded on the page yet. The Try-Except loop basically just keeps # trying to access the elements until they've loaded. The other solution I tried was to sleep() for a set time # before trying to access the elements. However, it seriously reduced the runtime so I'm sticking with the ugliness. driver.get('https://www.bet365.com/') driver.find_element_by_link_text("English").click() # go to Basketball markets while True: try: left_menu_div = driver.find_elements_by_class_name("wn-WebNavModule") left_menu = left_menu_div[0].find_elements_by_class_name("wn-Classification") break except: pass for link in left_menu: if link.text == SPORT: link.click() break # go to Futures while True: try: driver.find_elements_by_class_name("sl-LiveInPlayHeader_ButtonBarButton")[1].click() break except: pass # find the bet while True: try: nba_futures_div = driver.find_element_by_class_name("sm-MarketGroup") break except: pass bets = nba_futures_div.find_elements_by_class_name("sm-CouponLink_Label") for bet in bets: if bet.text == BET: bet.click() break # grab data while True: try: win_outright_div = driver.find_element_by_class_name("gl-MarketGroupContainer") teams = win_outright_div.find_elements_by_class_name("gl-Participant_Name") american_odds = win_outright_div.find_elements_by_class_name("gl-Participant_Odds") break except: pass # make new list with decimal odds num_teams = len(teams) if num_teams == 30: team_odds_pairs = [] for i in range(len(teams)): sign = american_odds[i].text[0] value = int(american_odds[i].text[1:]) if sign == '-': new_odds = (100 / value) + 1.0 else: new_odds = (value / 100) + 1.0 team_odds_pairs.append((teams[i].text, new_odds)) team_odds_pairs.sort() driver.close() else: raise Exception("There is data for " + str(num_teams) + " rather than the expected 30!") # output the data output_record = datetime.now().strftime("%Y-%m-%d") for pair in team_odds_pairs: if pair[1].is_integer(): odds = int(pair[1]) else: odds = round(pair[1], 9) output_record += "," + str(odds) print(output_record) # print(datetime.now().strftime("%Y-%m-%d")) # for pair in team_odds_pairs: # print(str(pair[0]) + "," + str(pair[1])) # # print(pair[1]) ================================================ FILE: VegasInsiderScraper.py ================================================ from requests import get from bs4 import BeautifulSoup import sys from datetime import datetime class VegasInsiderScraper: teams = ["Hawks", "Nets", "Celtics", "Hornets", "Bulls", "Cavaliers", "Mavericks", "Nuggets", "Pistons", "Warriors", "Rockets", "Pacers", "Clippers", "Lakers", "Grizzlies", "Heat", "Bucks", "T-Wolves", # discrepancy (was Timberwolves) "Pelicans", "Knicks", "Thunder", "Magic", "76ers", "Suns", "Blazers", "Spurs", "Kings", "Raptors", "Jazz", "Wizards"] def __init__(self, url, output_filename): self.__output_filename = output_filename self.__page = get(url) if self.__page.status_code == 200: self.__soup = BeautifulSoup(self.__page.content, "html.parser") else: sys.exit(1) self.__team_odds_pairs = [] def scrape(self): div = self.__soup.find(id="_") text = div.get_text().split('\n\n\n\n') num_teams = int(len(text) / 2) text = text[:num_teams*2] for i in range(len(text)): text[i] = text[i].lstrip() self.__create_team_odds_list(text) self.__append_to_csv() # table = self.__soup.find_all('table', class_="table-wrapper cellTextNorm")[0] # rows = table.find_all('tr')[1:] # num_teams = len(rows) # if num_teams == 30: # self.__create_team_odds_list(rows) # self.__append_to_csv() # else: # raise Exception("There is data for " + str(num_teams) + " rather than the expected 30") def __create_team_odds_list(self, text): i = 0 while i < len(text): team = text[i] odds = self.__convert_to_decimal(text[i + 1]) self.__team_odds_pairs.append((team, odds)) i += 2 # for i in range(len(text)): # row = text[i].find_all('td') # team = row[0].get_text() # odds = self.__convert_to_decimal(row[1].get_text()) # self.__team_odds_pairs.append((team, odds)) return self.__team_odds_pairs.sort() def __convert_to_decimal(self, odds): sign = odds[0] value = int(odds[1:]) if sign == '-': new_odds = (100 / value) + 1.0 else: new_odds = (value / 100) + 1.0 # numerator = int(odds.split('/')[0]) # denominator = int(odds.split('/')[1]) # new_odds = numerator / denominator + 1 if new_odds.is_integer(): return str(int(new_odds)) else: return str(round(new_odds, 9)) def __append_to_csv(self): output_record = datetime.now().strftime("%Y-%m-%d") scraped_teams = "DATE" for pair in self.__team_odds_pairs: scraped_teams += "," + pair[0].split(" ")[-1] for odds in self.__create_list_of_scraped_teams(scraped_teams): output_record += "," + odds with open(self.__output_filename, "a") as out_file: out_file.write(output_record + "\n") def __create_list_of_scraped_teams(self, scraped_teams): i = 0 odds_list = [] for team in self.teams: if team not in scraped_teams: odds_list.append("10000") else: odds_list.append(self.__team_odds_pairs[i][1]) i += 1 return odds_list if __name__ == "__main__": url = "https://www.sportsbook.ag/sbk/sportsbook4/nba-betting/nba-futures-championship.sbk" filename = "C:\\Users\\curti\\OneDrive\\Documents\\odds.txt" scraper = VegasInsiderScraper(url, filename) scraper.scrape() ================================================ FILE: csv-json.py ================================================ import json if __name__ == "__main__": with open("C:\\Users\\curti\\OneDrive\\Documents\\odds.txt", "r") as in_file: lines = in_file.readlines() column_names = lines[0].strip().split(',') column_names[0] = "DATE" la_counter = 0 # For distinguishing between LAC and LAL for i in range(len(column_names)): column_names[i] = column_names[i].split(' ')[0] if column_names[i] == "LA": if la_counter == 0: column_names[i] += "C" la_counter += 1 else: column_names[i] += "L" records = [] for i in range(31): new_column = [] for line in lines[1:]: row_values = line.strip().split(',') for j in range(len(row_values)): if j == i: if i > 0: try: appendee = int(row_values[j]) except ValueError: appendee = round(float(row_values[j]), 9) else: appendee = row_values[j] new_column.append(appendee) records.append(new_column) dictionary = dict(zip(column_names, records)) json_string = json.dumps(dictionary, indent=4) print(json_string) with open("C:\\Users\\curti\\OneDrive\\Documents\\odds.json", "w") as out_file: out_file.write(json_string)