Repository: mehranshakarami/AI_Spectrum Branch: main Commit: 88cd3d85f6a6 Files: 29 Total size: 29.4 KB Directory structure: gitextract_k_b5pfd1/ ├── .gitattributes ├── .gitignore ├── 2021/ │ └── Twitter_API/ │ ├── config.ini │ └── twitter_api.py ├── 2022/ │ ├── Math_plotter/ │ │ ├── math_plotter.py │ │ └── mathplotter/ │ │ ├── __init__.py │ │ ├── click_and_crop.py │ │ ├── latexPlotter.py │ │ ├── readEquations.py │ │ └── utils.py │ ├── Sentiment_Analysis/ │ │ └── tw-sentiment.py │ ├── Twitter_API/ │ │ ├── config.ini │ │ ├── twitter_data_search.py │ │ ├── twitter_data_stream.py │ │ └── twitter_data_users.py │ ├── Web_Scraping/ │ │ └── bs-amazon.py │ └── snscrape/ │ └── tweets.py ├── 2024/ │ ├── Multi-lingual sentiment analysis/ │ │ ├── main.py │ │ ├── readme.md │ │ ├── requirements.txt │ │ ├── sentiment.py │ │ ├── test_labels.csv │ │ ├── translate.py │ │ └── tweets.csv │ └── Twikit/ │ ├── config.ini │ ├── main.py │ ├── readme.md │ └── tweets.csv └── README.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto ================================================ FILE: .gitignore ================================================ # Mac folder *DS_Store ================================================ FILE: 2021/Twitter_API/config.ini ================================================ [twitter] api_key = api_key_secret = access_token = access_token_secret = ================================================ FILE: 2021/Twitter_API/twitter_api.py ================================================ import tweepy import configparser import pandas as pd # read configs config = configparser.ConfigParser() config.read('config.ini') api_key = config['twitter']['api_key'] api_key_secret = config['twitter']['api_key_secret'] access_token = config['twitter']['access_token'] access_token_secret = config['twitter']['access_token_secret'] # authentication auth = tweepy.OAuthHandler(api_key, api_key_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) public_tweets = api.home_timeline() # create dataframe columns = ['Time', 'User', 'Tweet'] data = [] for tweet in public_tweets: data.append([tweet.created_at, tweet.user.screen_name, tweet.text]) df = pd.DataFrame(data, columns=columns) df.to_csv('tweets.csv') ================================================ FILE: 2022/Math_plotter/math_plotter.py ================================================ from turtle import width from matplotlib.pyplot import axis from mathreader.api import * from mathreader.config import Configuration from mathreader.helpers.exceptions import * import base64 import numpy as np import cv2 import sys from PIL import ImageGrab from time import sleep from mathplotter.readEquations import find_equations, frame_change from mathplotter.click_and_crop import image_crop, add_text import matplotlib.pyplot as plt from mathplotter.latexPlotter import plot_eq import mathplotter.utils as utils import imutils # import colors for plot colors = utils.plotColors() cv2_color = colors[0] plt_color = colors[1] def hmp(cam=0, width=500, new_back=True): # plots inits # plt.ion() fig = plt.figure(figsize=(8, 5), tight_layout=True) ax = fig.gca() plt.pause(0.0001) configs = Configuration() hme_recognizer = HME_Recognizer() cap = cv2.VideoCapture(cam) if not cap.isOpened(): raise IOError("Cannot open webcam") # figs preparations if new_back: back, crop_box = image_crop(cam=cam, wind_name="background", width=width) cv2.imwrite("frame_background.jpg", back) with open("crop_box.txt", "w") as f: for line in crop_box: f.write(str(line)) f.write("\n") else: back = cv2.imread("frame_background.jpg") crop_box = [] with open("crop_box.txt", "r") as f: lines = f.readlines() for line in lines: crop_box.append(int(line)) x0, x1, y0, y1 = crop_box frame_old = back.copy() # inits eq_old = None utils.initializeTrackbars() ocrVal = False # main loop while True: equations = [] # equations_parser = [] gotNewEquation = False if utils.valTrackbars()[-1] == 0: ocrVal = False _, frame = cap.read() frame = imutils.resize(frame, width=width) frame = frame[x0:x1, y0:y1] frameBW, equations_imgs, bboxes = find_equations(frame, back) frameBW_BGR = cv2.cvtColor( frameBW, cv2.COLOR_GRAY2BGR, ) try: if bboxes: for idx, bbox in enumerate(bboxes): x, y, w, h = bbox cv2.rectangle( frameBW_BGR, (x, y), (x + w, y + h), cv2_color[idx], 4 ) cv2.imshow("pic", frameBW_BGR) else: cv2.imshow("pic", frameBW_BGR) except: cv2.imshow("pic", frameBW_BGR) if cv2.waitKey(1) & 0xFF == 27: break # esc to quit if ( ocrVal == False and utils.valTrackbars()[-1] == 1 ): # if OCR == 1 ocrVal = True for idx, eq in enumerate(equations_imgs): cv2.imwrite("eq.png", eq) hme_recognizer.load_image("eq.png", data_type="path") try: proc_img = frameBW_BGR.copy() add_text(proc_img, "Detecting") cv2.imshow("pic", proc_img) print("Detecting") expression, img = hme_recognizer.recognize() # expression_parsed = hme_recognizer.expression_after_parser print("Latex: ", expression) if "=" in expression: equations.append(expression) # equations_parser.append(expression_parsed) eq_old = frameBW.copy() gotNewEquation = True except: pass if gotNewEquation: try: print(equations) ax, fig = plot_eq(equations, ax, fig) plt.pause(0.0001) except Exception as e: print(e) # if new_eq is None: frame_old = frame.copy() sleep(0.1) cap.release() cv2.destroyAllWindows() if __name__ == "__main__": hmp(cam=1, width=None, new_back=False) ================================================ FILE: 2022/Math_plotter/mathplotter/__init__.py ================================================ ================================================ FILE: 2022/Math_plotter/mathplotter/click_and_crop.py ================================================ # import the necessary packages import argparse import cv2 import imutils # initialize the list of reference points and boolean indicating # whether cropping is being performed or not refPt = [] cropping = False sel_rect_endpoint = [] image = [] Lclick = False Rclick = False def add_text(img, text): font = cv2.FONT_HERSHEY_TRIPLEX font_size = 0.7 font_color = (130, 3, 3) font_thickness = 1 x, y = 15, 105 return cv2.putText( img, text, (x, y), font, font_size, font_color, font_thickness, cv2.LINE_AA, ) def click_and_crop(event, x, y, flags, param): # grab references to the global variables global refPt, cropping, sel_rect_endpoint, image, Lclick, Rclick wind_name = param # image = param # if the left mouse button was clicked, record the starting # (x, y) coordinates and indicate that cropping is being # performed if event == cv2.EVENT_LBUTTONDOWN: refPt = [[x, y]] Lclick = True # refPt = (min(ix,x), min(iy,y), abs(ix-x), abs(iy-y)) #set bounding box by mouse move cropping = True elif event == cv2.EVENT_MOUSEMOVE and cropping: sel_rect_endpoint = [[x, y]] # check to see if the left mouse button was released elif event == cv2.EVENT_LBUTTONUP: # record the ending (x, y) coordinates and indicate that # the cropping operation is finished refPt.append([x, y]) cropping = False Rclick = True # draw a rectangle around the region of interest cv2.rectangle(image, refPt[0], refPt[1], (0, 255, 0), 2) cv2.imshow(wind_name, image) def image_crop(cam=0, wind_name="image", width=500): captured = False cap = cv2.VideoCapture(cam) global refPt, cropping, sel_rect_endpoint, image while True: _, image = cap.read() image = imutils.resize(image, width=width) text = '"c": Capture, "Esc": Quit' image_text = image.copy() image_text = add_text(image_text, text) cv2.imshow(wind_name, image_text) key = cv2.waitKey(1) & 0xFF if key == ord("c"): captured = True break elif key == 27: break # esc to quit clone = image.copy() cv2.namedWindow(wind_name) cv2.setMouseCallback(wind_name, click_and_crop, (wind_name)) # keep looping until the 'q' key is pressed while captured: # display the image and wait for a keypress # cv2.imshow(wind_name, image) if not cropping and not Rclick and not Lclick: text = 'Draw a Box with Mouse, or "Esc": Quit' image_text = image.copy() image_text = add_text(image_text, text) cv2.imshow(wind_name, image_text) key = cv2.waitKey(1) & 0xFF if key == 27: return None # esc to quit elif cropping and sel_rect_endpoint: rect_cpy = image.copy() cv2.rectangle(rect_cpy, refPt[0], sel_rect_endpoint[0], (0, 255, 0), 1) cv2.imshow(wind_name, rect_cpy) elif Rclick and Lclick: text = '"c": Crop, "r": Reset Box, "Esc": Quit' image_text = image.copy() image_text = add_text(image_text, text) cv2.imshow(wind_name, image_text) key = cv2.waitKey(1) & 0xFF # if the 'r' key is pressed, reset the cropping region if key == ord("r"): image = clone.copy() cv2.imshow(wind_name, image) refPt = [] cropping = False sel_rect_endpoint = [] # if the 'c' key is pressed, break from the loop elif key == ord("c"): break elif key == 27: return None # if there are two reference points, then crop the region of interest # crop the image if len(refPt) == 2: crop_box = [ min(refPt[0][1], refPt[1][1]), max(refPt[0][1], refPt[1][1]), min(refPt[0][0], refPt[1][0]), max(refPt[0][0], refPt[1][0]), ] roi = clone[crop_box[0] : crop_box[1], crop_box[2] : crop_box[3]] # close all open windows # cv2.destroyAllWindows() cv2.destroyWindow(wind_name) return roi, crop_box if __name__ == "__main__": cam = 0 img_cropped, crop_box = image_crop(cam=cam, wind_name="background") cv2.imshow("background", img_cropped) cv2.waitKey(0) # cv2.imwrite("background.jpg", img_cropped) cap = cv2.VideoCapture(cam) _, frame = cap.read() x0, x1, y0, y1 = crop_box frame_cropped = frame[x0:x1, y0:y1] cv2.imshow("frame", frame_cropped) cv2.waitKey(0) # cv2.imwrite("frame.jpg", frame_cropped) ================================================ FILE: 2022/Math_plotter/mathplotter/latexPlotter.py ================================================ import numpy as np from mpl_toolkits.mplot3d import Axes3D import matplotlib import matplotlib.pyplot as plt from matplotlib import cm import re from time import sleep from sympy import numer import sympy from sympy.parsing.latex import parse_latex from sympy.plotting import plot, plot3d # plt.rcParams.update({"text.usetex": True, "xtick.labelsize": 16, "ytick.labelsize": 16}) plt_color = ["#0025b8", "#820303", "#02630f", "#460263", "#018c75"] def plot_eq(equations, ax, fig): ax.clear() ax, fig = check_axis(equations[0], ax, fig) for idx, eq in enumerate(equations): eq = eq.replace("\\cdot", "*") if "z" in eq: dim = 3 eq = eq.replace("z", "") eq = eq.replace("=", "") else: dim = 2 eq = eq.replace("y", "") eq = eq.replace("=", "") # plot equations sympy_eq = parse_latex(eq) if dim == 2: p = plot(sympy_eq,show=False) eq_latex = ( r"$y=" + sympy.latex(sympy_eq) + "$" ) x,y =p[0].get_data() ax.plot( x, y, label=eq_latex, color=plt_color[idx], linewidth=2, ) else: p = plot3d(sympy_eq,show=False) x,y,z =p[0].get_meshes() eq_latex = ( r"$z=" + sympy.latex(sympy_eq) + "$" ) surf = ax.plot_surface( x, y, z, label=eq_latex, cmap=cm.coolwarm, linewidth=0, antialiased=False, ) surf._facecolors2d = surf._facecolor3d surf._edgecolors2d = surf._edgecolor3d ax.legend(fontsize=12) plt.pause(0.0001) plt.show(block=False) plt.pause(0.0001) return ax, fig def check_axis(eq, ax, fig): if "z" in eq: if ax.name != "3d": ax.remove() plt.pause(0.0001) ax = fig.add_subplot(projection="3d") plt.pause(0.0001) else: if ax.name == "3d": ax.remove() plt.pause(0.0001) ax = fig.add_subplot() plt.pause(0.0001) return ax, fig if __name__ == "__main__": plt.ion() fig = plt.figure(figsize=(8, 5), tight_layout=True) ax = fig.gca() plt.pause(0.0001) equations = ["y=\\sqrt{\\sqrt{x^2}}+\\sqrt{1-x^2}", "y=\\sqrt{\\sqrt{x^2}}-\\sqrt{1-x^2}"] ax, fig = plot_eq(equations, ax, fig) sleep(2) equations = [r"z=x^2-y^2"] ax, fig = plot_eq(equations, ax, fig) sleep(2) # equations = ["z=x+y"] # ax, fig = plot_eq(equations, ax, fig) # sleep(2) # equations = ["y=x^2", "y=2*x"] # ax, fig = plot_eq(equations, ax, fig) # sleep(2) ================================================ FILE: 2022/Math_plotter/mathplotter/readEquations.py ================================================ import cv2 import numpy as np from PIL import Image import mathplotter.utils as utils colors = utils.plotColors() cv2_color = colors[0] plt_color = colors[1] def to_bw(img): img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # img_blur = cv2.GaussianBlur(img, (21, 21), 0) (thresh, img_bw) = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) setThersh = utils.valTrackbars()[3] if thresh < setThersh: _, img_bw = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY) kernel_size = utils.valTrackbars()[0] kernel_dilate = np.ones((kernel_size, kernel_size)) img_dilate = cv2.dilate(img_bw, kernel_dilate, iterations=1) return img_dilate def find_equations(img, back_img): subtracted_img = cv2.subtract(back_img, img) img_bw = to_bw(subtracted_img) img_clean = cv2.bitwise_not(img_bw) ker_morph_x, ker_morph_y = utils.valTrackbars()[1:3] if ker_morph_x == 0: ker_morph_x = 1 if ker_morph_y == 0: ker_morph_y = 1 kernel_morph = cv2.getStructuringElement(cv2.MORPH_RECT, (ker_morph_x, ker_morph_y)) img_morph = cv2.morphologyEx(img_bw, cv2.MORPH_DILATE, kernel_morph) # ---Finding contours --- contours, hierarchy = cv2.findContours( img_morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE ) equations = [] box_coords = [] for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) box_coords.append([x, y, w, h]) img_cropped = img_clean[y : y + h, x : x + w] equations.append(img_cropped) return img_clean, equations, box_coords def frame_change(new_frame, old_frame): delta = cv2.subtract(old_frame, new_frame) # check for rgb image if len(delta.shape) == 3: delta = to_bw(delta) return np.linalg.norm(delta) != 0 if __name__ == "__main__": from click_and_crop import image_crop from time import sleep utils.initializeTrackbars() back = cv2.imread("background1.jpg") frame = cv2.imread("frame1.jpg") while True: if cv2.waitKey(1) & 0xFF == 27: break # esc to quit frameBW, equations, bboxes = find_equations(frame, back) frameBW_BGR = cv2.cvtColor( frameBW, cv2.COLOR_GRAY2BGR, ) try: if bboxes: for idx, bbox in enumerate(bboxes): x, y, w, h = bbox cv2.rectangle( frameBW_BGR, (x, y), (x + w, y + h), cv2_color[idx], 4 ) cv2.imshow("pic", frameBW_BGR) except: cv2.imshow("pic", frameBW_BGR) ================================================ FILE: 2022/Math_plotter/mathplotter/utils.py ================================================ # import opencv and numpy import cv2 import numpy as np def plotColors(): cv2_color = [ (130, 3, 3), (0, 37, 184), (2, 99, 15), (70, 2, 99), (1, 140, 117), (138, 145, 1), (1, 120, 106), ] plt_color = [ "#0025b8", "#820303", "#02630f", "#460263", "#018c75", "#8a9101", "#01786a", ] return [cv2_color, plt_color] # trackbar callback fucntion does nothing but required for trackbar def nothing(x): pass def initializeTrackbars(initVals=[1, 150, 50, 2]): # create a seperate window for trackbar cv2.namedWindow("trackbars") # create trackbars cv2.createTrackbar("Cut noise", "trackbars", initVals[3], 20, nothing) cv2.createTrackbar("Text width", "trackbars", initVals[0], 10, nothing) cv2.createTrackbar("Box width", "trackbars", initVals[1], 300, nothing) cv2.createTrackbar("Box hight", "trackbars", initVals[2], 300, nothing) cv2.createTrackbar("OCR", "trackbars", 0, 1, nothing) def valTrackbars(): textWidth = cv2.getTrackbarPos("Text width", "trackbars") boxWidth = cv2.getTrackbarPos("Box width", "trackbars") boxHeight = cv2.getTrackbarPos("Box hight", "trackbars") cutNoise = cv2.getTrackbarPos("Cut noise", "trackbars") ocr = cv2.getTrackbarPos("OCR", "trackbars") return [textWidth, boxWidth, boxHeight, cutNoise, ocr] ================================================ FILE: 2022/Sentiment_Analysis/tw-sentiment.py ================================================ from transformers import AutoTokenizer, AutoModelForSequenceClassification from scipy.special import softmax # tweet = "@MehranShakarami today's cold @ home 😒 https://mehranshakarami.com" tweet = 'Great content! subscribed 😉' # precprcess tweet tweet_words = [] for word in tweet.split(' '): if word.startswith('@') and len(word) > 1: word = '@user' elif word.startswith('http'): word = "http" tweet_words.append(word) tweet_proc = " ".join(tweet_words) # load model and tokenizer roberta = "cardiffnlp/twitter-roberta-base-sentiment" model = AutoModelForSequenceClassification.from_pretrained(roberta) tokenizer = AutoTokenizer.from_pretrained(roberta) labels = ['Negative', 'Neutral', 'Positive'] # sentiment analysis encoded_tweet = tokenizer(tweet_proc, return_tensors='pt') # output = model(encoded_tweet['input_ids'], encoded_tweet['attention_mask']) output = model(**encoded_tweet) scores = output[0][0].detach().numpy() scores = softmax(scores) for i in range(len(scores)): l = labels[i] s = scores[i] print(l,s) ================================================ FILE: 2022/Twitter_API/config.ini ================================================ [twitter] api_key = api_key_secret = access_token = access_token_secret = ================================================ FILE: 2022/Twitter_API/twitter_data_search.py ================================================ import tweepy import configparser import pandas as pd # read configs config = configparser.ConfigParser() config.read('config.ini') api_key = config['twitter']['api_key'] api_key_secret = config['twitter']['api_key_secret'] access_token = config['twitter']['access_token'] access_token_secret = config['twitter']['access_token_secret'] # authentication auth = tweepy.OAuthHandler(api_key, api_key_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # user tweets # user = 'veritasium' # limit=300 # tweets = tweepy.Cursor(api.user_timeline, screen_name=user, count=200, tweet_mode='extended').items(limit) # search tweets keywords = '@veritasium' limit=300 tweets = tweepy.Cursor(api.search_tweets, q=keywords, count=100, tweet_mode='extended').items(limit) # tweets = api.user_timeline(screen_name=user, count=limit, tweet_mode='extended') # create DataFrame columns = ['User', 'Tweet'] data = [] for tweet in tweets: data.append([tweet.user.screen_name, tweet.full_text]) df = pd.DataFrame(data, columns=columns) print(df) ================================================ FILE: 2022/Twitter_API/twitter_data_stream.py ================================================ import tweepy import configparser import pandas as pd # read configs config = configparser.ConfigParser() config.read('config.ini') api_key = config['twitter']['api_key'] api_key_secret = config['twitter']['api_key_secret'] access_token = config['twitter']['access_token'] access_token_secret = config['twitter']['access_token_secret'] # authentication auth = tweepy.OAuthHandler(api_key, api_key_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) class Linstener(tweepy.Stream): tweets = [] limit = 1 def on_status(self, status): self.tweets.append(status) # print(status.user.screen_name + ": " + status.text) if len(self.tweets) == self.limit: self.disconnect() stream_tweet = Linstener(api_key, api_key_secret, access_token, access_token_secret) # stream by keywords # keywords = ['2022', '#python'] # stream_tweet.filter(track=keywords) # stream by users users = ['MehranShakarami', 'veritasium'] user_ids = [] for user in users: user_ids.append(api.get_user(screen_name=user).id) stream_tweet.filter(follow=user_ids) # create DataFrame columns = ['User', 'Tweet'] data = [] for tweet in stream_tweet.tweets: if not tweet.truncated: data.append([tweet.user.screen_name, tweet.text]) else: data.append([tweet.user.screen_name, tweet.extended_tweet['full_text']]) df = pd.DataFrame(data, columns=columns) print(df) ================================================ FILE: 2022/Twitter_API/twitter_data_users.py ================================================ import tweepy import configparser import pandas as pd # read configs config = configparser.ConfigParser() config.read('config.ini') api_key = config['twitter']['api_key'] api_key_secret = config['twitter']['api_key_secret'] access_token = config['twitter']['access_token'] access_token_secret = config['twitter']['access_token_secret'] # authentication auth = tweepy.OAuthHandler(api_key, api_key_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # user tweets user = 'veritasium' limit=300 tweets = tweepy.Cursor(api.user_timeline, screen_name=user, count=200, tweet_mode='extended').items(limit) # tweets = api.user_timeline(screen_name=user, count=limit, tweet_mode='extended') # create DataFrame columns = ['User', 'Tweet'] data = [] for tweet in tweets: data.append([tweet.user.screen_name, tweet.full_text]) df = pd.DataFrame(data, columns=columns) print(df) ================================================ FILE: 2022/Web_Scraping/bs-amazon.py ================================================ from bs4 import BeautifulSoup import requests import csv # get html url = "https://www.amazon.com/Best-Sellers-Books/zgbs/books" # change the user-agent value based on your web browser headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36'} page = requests.get(url, headers=headers) soup = BeautifulSoup(page.content, 'html.parser') # get all books books = soup.find_all(id="gridItemRoot") csv_headers = ['Rank', 'Title', 'Author', 'Price'] with open('amazon_books.csv', 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) writer.writerow(csv_headers) for book in books: rank = book.find('span', class_='zg-bdg-text').text[1:] children = book.find('div', class_='zg-grid-general-faceout').div title = children.contents[1].text author = children.contents[2].text price = children.contents[-1].text with open('amazon_books.csv', 'a', encoding='utf-8', newline='') as f: writer = csv.writer(f) writer.writerow([rank, title, author, price]) ================================================ FILE: 2022/snscrape/tweets.py ================================================ import snscrape.modules.twitter as sntwitter import pandas as pd query = "(from:elonmusk) until:2020-01-01 since:2010-01-01" tweets = [] limit = 5000 for tweet in sntwitter.TwitterSearchScraper(query).get_items(): # print(vars(tweet)) # break if len(tweets) == limit: break else: tweets.append([tweet.date, tweet.username, tweet.content]) df = pd.DataFrame(tweets, columns=['Date', 'User', 'Tweet']) print(df) # to save to csv # df.to_csv('tweets.csv') ================================================ FILE: 2024/Multi-lingual sentiment analysis/main.py ================================================ from sentiment import predict_sentiment, ROBERTA_SUPPORTED_LANGUAGES from translate import translate_text import csv def read_tweets(file_path: str) -> list[dict[str, str]]: with open(file_path, "r") as file: reader = csv.DictReader(file) list_of_tweets = list(reader) return list_of_tweets tweets = read_tweets("./tweets.csv") sentiment_by_id = {} for tweet in tweets: tweet_text, language = tweet["text"], tweet["language"] if not (language and language in ROBERTA_SUPPORTED_LANGUAGES): translated_text, language = translate_text(tweet_text) if language in ROBERTA_SUPPORTED_LANGUAGES: sentiment = predict_sentiment(tweet_text) else: sentiment = predict_sentiment(translated_text) sentiment_by_id[tweet["id"]] = sentiment #/ check the accuracy test_labels = read_tweets("./test_labels.csv") correct_predictions = 0 for test in test_labels: if sentiment_by_id[test["id"]] == test["label"]: correct_predictions += 1 accuracy = correct_predictions / len(test_labels) print(f"Accuracy: {accuracy:.2f}") ================================================ FILE: 2024/Multi-lingual sentiment analysis/readme.md ================================================ # Multi-lingual tweet sentiment analysis This code is based on [twitter-XLM-roBERTa-base](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment) and Google Translate to perform sentiment analysis on multi-lingual tweets. Check out the supplementary YouTube tutorial: https://youtu.be/t_A_35m9OzU ================================================ FILE: 2024/Multi-lingual sentiment analysis/requirements.txt ================================================ googletrans==3.1.0a0 transformers==4.44.2 torch==2.4.0 sentencepiece==0.2.0 protobuf==5.28.0 ================================================ FILE: 2024/Multi-lingual sentiment analysis/sentiment.py ================================================ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig MODEL = "cardiffnlp/twitter-xlm-roberta-base-sentiment" ROBERTA_SUPPORTED_LANGUAGES = ('ar', 'en', 'fr', 'de', 'hi', 'it', 'es', 'pt') model = AutoModelForSequenceClassification.from_pretrained(MODEL) tokenizer = AutoTokenizer.from_pretrained(MODEL) config = AutoConfig.from_pretrained(MODEL) #/ save the model locally model.save_pretrained(MODEL) tokenizer.save_pretrained(MODEL) # Preprocess text (username and link placeholders) def preprocess(text): new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = 'http' if t.startswith('http') else t new_text.append(t) return " ".join(new_text) def predict_sentiment(text: str) -> str: processed_text = preprocess(text) encoded_input = tokenizer(processed_text, return_tensors='pt') output = model(**encoded_input) index_of_sentiment = output.logits.argmax().item() sentiment = config.id2label[index_of_sentiment] return sentiment # text = "la pizza da @michele è veramente buona https://www.youtube.com" # text = "این غذا خیلی شوره!" # text = "یه جلسه دیگه که میتونست یه ایمیل باشه 🥲" # print(predict_sentiment(text)) ================================================ FILE: 2024/Multi-lingual sentiment analysis/test_labels.csv ================================================ id,label 0,positive 1,negative 2,positive 3,neutral 4,negative 5,neutral 6,positive 7,neutral 8,positive 9,negative 10,neutral 11,positive 12,negative 13,negative ================================================ FILE: 2024/Multi-lingual sentiment analysis/translate.py ================================================ from googletrans import Translator translator = Translator() def translate_text(original_text: str) -> str: translation = translator.translate(original_text, dest='en') translated_text, original_language = translation.text, translation.src return translated_text, original_language # original_text = "این غذا خیلی شوره!" # print(translate_text(original_text)) ================================================ FILE: 2024/Multi-lingual sentiment analysis/tweets.csv ================================================ id,text,language 0,باب الحارة يغدر بيك غدر، يخليك اتفرج علية من الاول بدون ما تفطن😂, 1,مشاكل البلاك بورد متى تنتهي💔؟, 2,@FCAugsburg Der neue Styler der Bilder ist echt genial! Gefällt mir!,de 3,Kein Pardon: UEFA-Ermittlungen gegen Manchester United! #MUFCFCV #MUFC #Manchester https://t.co/PwZFpoqN1p https://t.co/PYfNgyneSu,de 4,"Ach man, ich hasse Gegentore. #S04",de 5,is listening to Jello Biafra at work.,en 6,"@_chloe yes! it`s on youtube its from may 7th, and it made me feel 100x better. it`s halarious.",en 7,اینا اپلیکیشن فرم ندارن براشون اپلای کنیم؟,fa 8,این ترم هم معدل الف شدم هو هو🤭🤭,fa 9,خب حس میکنم پیر شدم و تا اطلاع ثانوی همه چی کنسله کنسل,fa 10,सर्व धर्म मंदिर के अलावा यहां पर एक संग्रहालय और चिड़ियाघर भी है।,hi 11,"इस पर ब्राउज़िंग करना, गेम्स खेलना और मूवी देखना काफी अच्छा अनुभव है।",hi 12,कम से कम HD स्क्रीन तो देनी चाहिए थी।,hi 13,Great! Another meeting that could be an email.,en ================================================ FILE: 2024/Twikit/config.ini ================================================ [X] username = xxx password = xxxxxx email = xxxx@xxxx.com ================================================ FILE: 2024/Twikit/main.py ================================================ from twikit import Client, TooManyRequests import time from datetime import datetime import csv from configparser import ConfigParser from random import randint MINIMUM_TWEETS = 10 QUERY = '(from:elonmusk) lang:en until:2020-01-01 since:2018-01-01' def get_tweets(tweets): if tweets is None: #* get tweets print(f'{datetime.now()} - Getting tweets...') tweets = client.search_tweet(QUERY, product='Top') else: wait_time = randint(5, 10) print(f'{datetime.now()} - Getting next tweets after {wait_time} seconds ...') time.sleep(wait_time) tweets = tweets.next() return tweets #* login credentials config = ConfigParser() config.read('config.ini') username = config['X']['username'] email = config['X']['email'] password = config['X']['password'] #* create a csv file with open('tweets.csv', 'w', newline='') as file: writer = csv.writer(file) writer.writerow(['Tweet_count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes']) #* authenticate to X.com #! 1) use the login credentials. 2) use cookies. client = Client(language='en-US') # client.login(auth_info_1=username, auth_info_2=email, password=password) # client.save_cookies('cookies.json') client.load_cookies('cookies.json') tweet_count = 0 tweets = None while tweet_count < MINIMUM_TWEETS: try: tweets = get_tweets(tweets) except TooManyRequests as e: rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset) print(f'{datetime.now()} - Rate limit reached. Waiting until {rate_limit_reset}') wait_time = rate_limit_reset - datetime.now() time.sleep(wait_time.total_seconds()) continue if not tweets: print(f'{datetime.now()} - No more tweets found') break for tweet in tweets: tweet_count += 1 tweet_data = [tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count] with open('tweets.csv', 'a', newline='') as file: writer = csv.writer(file) writer.writerow(tweet_data) print(f'{datetime.now()} - Got {tweet_count} tweets') print(f'{datetime.now()} - Done! Got {tweet_count} tweets found') ================================================ FILE: 2024/Twikit/readme.md ================================================ # Scraping `X.com` with Twikit This code use [Twikit](https://github.com/d60/twikit) to scrape Tweet data. To run the code, use `pip install “twikit==1.7.6”` to install the _twikit_ package. The latest update of _twikit_ deprecated the synchronous method which is used in the code. Check out the supplementary YouTube tutorial: https://youtu.be/6D6fVyFQD5A ================================================ FILE: 2024/Twikit/tweets.csv ================================================ Tweet_count,Username,Text,Created At,Retweets,Likes ================================================ FILE: README.md ================================================ # AI_Spectrum