[
  {
    "path": ".gitattributes",
    "content": "# Auto detect text files and perform LF normalization\n* text=auto\n"
  },
  {
    "path": ".gitignore",
    "content": "# Mac folder\n*DS_Store"
  },
  {
    "path": "2021/Twitter_API/config.ini",
    "content": "[twitter]\n\napi_key = \napi_key_secret = \n\naccess_token = \naccess_token_secret = "
  },
  {
    "path": "2021/Twitter_API/twitter_api.py",
    "content": "import tweepy\nimport configparser\nimport pandas as pd\n\n# read configs\nconfig = configparser.ConfigParser()\nconfig.read('config.ini')\n\napi_key = config['twitter']['api_key']\napi_key_secret = config['twitter']['api_key_secret']\n\naccess_token = config['twitter']['access_token']\naccess_token_secret = config['twitter']['access_token_secret']\n\n# authentication\nauth = tweepy.OAuthHandler(api_key, api_key_secret)\nauth.set_access_token(access_token, access_token_secret)\n\napi = tweepy.API(auth)\n\npublic_tweets = api.home_timeline()\n\n# create dataframe\ncolumns = ['Time', 'User', 'Tweet']\ndata = []\nfor tweet in public_tweets:\n    data.append([tweet.created_at, tweet.user.screen_name, tweet.text])\n\ndf = pd.DataFrame(data, columns=columns)\n\ndf.to_csv('tweets.csv')"
  },
  {
    "path": "2022/Math_plotter/math_plotter.py",
    "content": "from turtle import width\n\nfrom matplotlib.pyplot import axis\nfrom mathreader.api import *\nfrom mathreader.config import Configuration\nfrom mathreader.helpers.exceptions import *\nimport base64\nimport numpy as np\nimport cv2\nimport sys\nfrom PIL import ImageGrab\nfrom time import sleep\nfrom mathplotter.readEquations import find_equations, frame_change\nfrom mathplotter.click_and_crop import image_crop, add_text\nimport matplotlib.pyplot as plt\nfrom mathplotter.latexPlotter import plot_eq\nimport mathplotter.utils as utils\nimport imutils\n\n\n# import colors for plot\ncolors = utils.plotColors()\ncv2_color = colors[0]\nplt_color = colors[1]\n\n\ndef hmp(cam=0, width=500, new_back=True):\n\n    # plots inits\n    # plt.ion()\n    fig = plt.figure(figsize=(8, 5), tight_layout=True)\n    ax = fig.gca()\n    plt.pause(0.0001)\n\n    configs = Configuration()\n    hme_recognizer = HME_Recognizer()\n\n    cap = cv2.VideoCapture(cam)\n\n    if not cap.isOpened():\n        raise IOError(\"Cannot open webcam\")\n\n    # figs preparations\n    if new_back:\n        back, crop_box = image_crop(cam=cam, wind_name=\"background\", width=width)\n        cv2.imwrite(\"frame_background.jpg\", back)\n        with open(\"crop_box.txt\", \"w\") as f:\n            for line in crop_box:\n                f.write(str(line))\n                f.write(\"\\n\")\n\n    else:\n        back = cv2.imread(\"frame_background.jpg\")\n        crop_box = []\n        with open(\"crop_box.txt\", \"r\") as f:\n            lines = f.readlines()\n\n        for line in lines:\n            crop_box.append(int(line))\n\n    x0, x1, y0, y1 = crop_box\n\n\n    frame_old = back.copy()\n\n    # inits\n    eq_old = None\n\n    utils.initializeTrackbars()\n    ocrVal = False\n    # main loop\n    while True:\n        equations = []\n        # equations_parser = []\n        gotNewEquation = False\n        \n        if utils.valTrackbars()[-1] == 0: \n            ocrVal = False \n        \n        _, frame = cap.read()\n        frame = imutils.resize(frame, width=width)\n        frame = frame[x0:x1, y0:y1]\n\n        frameBW, equations_imgs, bboxes = find_equations(frame, back)\n\n        frameBW_BGR = cv2.cvtColor(\n            frameBW,\n            cv2.COLOR_GRAY2BGR,\n        )\n\n        try:\n            if bboxes:\n                for idx, bbox in enumerate(bboxes):\n                    x, y, w, h = bbox\n                    cv2.rectangle(\n                        frameBW_BGR, (x, y), (x + w, y + h), cv2_color[idx], 4\n                    )\n                    cv2.imshow(\"pic\", frameBW_BGR)\n            else:\n                cv2.imshow(\"pic\", frameBW_BGR)\n        except:\n            cv2.imshow(\"pic\", frameBW_BGR)\n\n        if cv2.waitKey(1) & 0xFF == 27:\n            break  # esc to quit\n\n        if ( ocrVal == False and\n            utils.valTrackbars()[-1] == 1\n        ):  # if OCR == 1\n\n            ocrVal = True\n            \n            for idx, eq in enumerate(equations_imgs):\n\n                cv2.imwrite(\"eq.png\", eq)\n                hme_recognizer.load_image(\"eq.png\", data_type=\"path\")\n\n                try:\n                    \n                    proc_img = frameBW_BGR.copy()\n                    add_text(proc_img, \"Detecting\")\n                    cv2.imshow(\"pic\", proc_img)\n                    print(\"Detecting\")\n                    expression, img = hme_recognizer.recognize()\n                    # expression_parsed = hme_recognizer.expression_after_parser\n                    print(\"Latex: \", expression)\n                    if \"=\" in expression:\n\n                        equations.append(expression)\n                        # equations_parser.append(expression_parsed)\n                        eq_old = frameBW.copy()\n                        gotNewEquation = True\n                    \n                except:\n                    pass\n\n            if gotNewEquation:\n\n                try:\n                    print(equations)\n                    ax, fig = plot_eq(equations, ax, fig)\n                    plt.pause(0.0001)\n                except Exception as e:\n                    print(e)\n            # if new_eq is None:\n\n        frame_old = frame.copy()\n        sleep(0.1)\n        \n    cap.release()\n    cv2.destroyAllWindows()\n\n\n\nif __name__ == \"__main__\":\n    hmp(cam=1, width=None, new_back=False)\n\n    "
  },
  {
    "path": "2022/Math_plotter/mathplotter/__init__.py",
    "content": ""
  },
  {
    "path": "2022/Math_plotter/mathplotter/click_and_crop.py",
    "content": "# import the necessary packages\nimport argparse\nimport cv2\nimport imutils\n\n# initialize the list of reference points and boolean indicating\n# whether cropping is being performed or not\nrefPt = []\ncropping = False\nsel_rect_endpoint = []\nimage = []\nLclick = False\nRclick = False\n\n\ndef add_text(img, text):\n    font = cv2.FONT_HERSHEY_TRIPLEX\n    font_size = 0.7\n    font_color = (130, 3, 3)\n    font_thickness = 1\n    x, y = 15, 105\n\n    return cv2.putText(\n        img,\n        text,\n        (x, y),\n        font,\n        font_size,\n        font_color,\n        font_thickness,\n        cv2.LINE_AA,\n    )\n\n\ndef click_and_crop(event, x, y, flags, param):\n    # grab references to the global variables\n    global refPt, cropping, sel_rect_endpoint, image, Lclick, Rclick\n    wind_name = param\n    # image = param\n    # if the left mouse button was clicked, record the starting\n    # (x, y) coordinates and indicate that cropping is being\n    # performed\n    if event == cv2.EVENT_LBUTTONDOWN:\n        refPt = [[x, y]]\n        Lclick = True\n        # refPt = (min(ix,x), min(iy,y), abs(ix-x), abs(iy-y)) #set bounding box by mouse move\n        cropping = True\n    elif event == cv2.EVENT_MOUSEMOVE and cropping:\n        sel_rect_endpoint = [[x, y]]\n    # check to see if the left mouse button was released\n    elif event == cv2.EVENT_LBUTTONUP:\n        # record the ending (x, y) coordinates and indicate that\n        # the cropping operation is finished\n        refPt.append([x, y])\n        cropping = False\n        Rclick = True\n        # draw a rectangle around the region of interest\n        cv2.rectangle(image, refPt[0], refPt[1], (0, 255, 0), 2)\n        cv2.imshow(wind_name, image)\n\n\ndef image_crop(cam=0, wind_name=\"image\", width=500):\n    captured = False\n    cap = cv2.VideoCapture(cam)\n    global refPt, cropping, sel_rect_endpoint, image\n\n    while True:\n        _, image = cap.read()\n        image = imutils.resize(image, width=width)\n        text = '\"c\": Capture, \"Esc\": Quit'\n        image_text = image.copy()\n        image_text = add_text(image_text, text)\n        cv2.imshow(wind_name, image_text)\n        key = cv2.waitKey(1) & 0xFF\n\n        if key == ord(\"c\"):\n            captured = True\n            break\n\n        elif key == 27:\n            break  # esc to quit\n\n    clone = image.copy()\n    cv2.namedWindow(wind_name)\n    cv2.setMouseCallback(wind_name, click_and_crop, (wind_name))\n\n    # keep looping until the 'q' key is pressed\n    while captured:\n        # display the image and wait for a keypress\n        # cv2.imshow(wind_name, image)\n\n        if not cropping and not Rclick and not Lclick:\n\n            text = 'Draw a Box with Mouse, or \"Esc\": Quit'\n            image_text = image.copy()\n            image_text = add_text(image_text, text)\n            cv2.imshow(wind_name, image_text)\n            key = cv2.waitKey(1) & 0xFF\n\n            if key == 27:\n                return None  # esc to quit\n\n        elif cropping and sel_rect_endpoint:\n\n            rect_cpy = image.copy()\n            cv2.rectangle(rect_cpy, refPt[0], sel_rect_endpoint[0], (0, 255, 0), 1)\n            cv2.imshow(wind_name, rect_cpy)\n\n        elif Rclick and Lclick:\n            text = '\"c\": Crop, \"r\": Reset Box, \"Esc\": Quit'\n            image_text = image.copy()\n            image_text = add_text(image_text, text)\n            cv2.imshow(wind_name, image_text)\n\n        key = cv2.waitKey(1) & 0xFF\n        # if the 'r' key is pressed, reset the cropping region\n        if key == ord(\"r\"):\n\n            image = clone.copy()\n            cv2.imshow(wind_name, image)\n            refPt = []\n            cropping = False\n            sel_rect_endpoint = []\n\n        # if the 'c' key is pressed, break from the loop\n        elif key == ord(\"c\"):\n            break\n\n        elif key == 27:\n            return None\n    # if there are two reference points, then crop the region of interest\n    # crop the image\n\n    if len(refPt) == 2:\n        crop_box = [\n            min(refPt[0][1], refPt[1][1]),\n            max(refPt[0][1], refPt[1][1]),\n            min(refPt[0][0], refPt[1][0]),\n            max(refPt[0][0], refPt[1][0]),\n        ]\n        roi = clone[crop_box[0] : crop_box[1], crop_box[2] : crop_box[3]]\n\n        # close all open windows\n        # cv2.destroyAllWindows()\n        cv2.destroyWindow(wind_name)\n        return roi, crop_box\n\n\nif __name__ == \"__main__\":\n    cam = 0\n    img_cropped, crop_box = image_crop(cam=cam, wind_name=\"background\")\n    cv2.imshow(\"background\", img_cropped)\n    cv2.waitKey(0)\n    # cv2.imwrite(\"background.jpg\", img_cropped)\n    cap = cv2.VideoCapture(cam)\n    _, frame = cap.read()\n    x0, x1, y0, y1 = crop_box\n    frame_cropped = frame[x0:x1, y0:y1]\n    cv2.imshow(\"frame\", frame_cropped)\n    cv2.waitKey(0)\n    # cv2.imwrite(\"frame.jpg\", frame_cropped)\n"
  },
  {
    "path": "2022/Math_plotter/mathplotter/latexPlotter.py",
    "content": "import numpy as np\nfrom mpl_toolkits.mplot3d import Axes3D\nimport matplotlib\n\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\nimport re\nfrom time import sleep\nfrom sympy import numer\nimport sympy\nfrom sympy.parsing.latex import parse_latex\nfrom sympy.plotting import plot, plot3d\n\n# plt.rcParams.update({\"text.usetex\": True, \"xtick.labelsize\": 16, \"ytick.labelsize\": 16})\nplt_color = [\"#0025b8\", \"#820303\", \"#02630f\", \"#460263\", \"#018c75\"]\n\n\n\n\ndef plot_eq(equations, ax, fig):\n\n    ax.clear()\n\n    ax, fig = check_axis(equations[0], ax, fig)\n\n    for idx, eq in enumerate(equations):\n   \n        eq = eq.replace(\"\\\\cdot\", \"*\")\n        if \"z\" in eq:\n            dim = 3\n            eq = eq.replace(\"z\", \"\")\n            eq = eq.replace(\"=\", \"\")\n        else:\n            dim = 2\n            eq = eq.replace(\"y\", \"\")\n            eq = eq.replace(\"=\", \"\")\n\n        \n        # plot equations\n        sympy_eq = parse_latex(eq)\n\n        \n        if dim == 2:\n            p = plot(sympy_eq,show=False)\n            eq_latex = (\n                r\"$y=\"\n                + sympy.latex(sympy_eq)\n                + \"$\"\n            )\n            x,y =p[0].get_data()\n            ax.plot(\n                x,\n                y,\n                label=eq_latex,\n                color=plt_color[idx],\n                linewidth=2,\n            )\n        else:\n            p = plot3d(sympy_eq,show=False)\n           \n            x,y,z =p[0].get_meshes()\n            eq_latex = (\n                r\"$z=\"\n                + sympy.latex(sympy_eq)\n                + \"$\"\n            )\n            surf = ax.plot_surface(\n                x,\n                y,\n                z,\n                label=eq_latex,\n                cmap=cm.coolwarm,\n                linewidth=0,\n                antialiased=False,\n            )\n            surf._facecolors2d = surf._facecolor3d\n            surf._edgecolors2d = surf._edgecolor3d\n\n    ax.legend(fontsize=12)\n    plt.pause(0.0001)\n    plt.show(block=False)\n    plt.pause(0.0001)\n\n    return ax, fig\n\n\ndef check_axis(eq, ax, fig):\n\n    if \"z\" in eq:\n        if ax.name != \"3d\":\n            ax.remove()\n            plt.pause(0.0001)\n            ax = fig.add_subplot(projection=\"3d\")\n            plt.pause(0.0001)\n    else:\n        if ax.name == \"3d\":\n            ax.remove()\n            plt.pause(0.0001)\n            ax = fig.add_subplot()\n            plt.pause(0.0001)\n\n    return ax, fig\n\n\nif __name__ == \"__main__\":\n    plt.ion()\n    fig = plt.figure(figsize=(8, 5), tight_layout=True)\n    ax = fig.gca()\n    plt.pause(0.0001)\n\n    equations = [\"y=\\\\sqrt{\\\\sqrt{x^2}}+\\\\sqrt{1-x^2}\", \"y=\\\\sqrt{\\\\sqrt{x^2}}-\\\\sqrt{1-x^2}\"]\n    ax, fig = plot_eq(equations, ax, fig)\n    sleep(2)\n    equations = [r\"z=x^2-y^2\"]\n    ax, fig = plot_eq(equations, ax, fig)\n    sleep(2)\n\n    # equations = [\"z=x+y\"]\n    # ax, fig = plot_eq(equations, ax, fig)\n    # sleep(2)\n\n    # equations = [\"y=x^2\", \"y=2*x\"]\n    # ax, fig = plot_eq(equations, ax, fig)\n    # sleep(2)\n"
  },
  {
    "path": "2022/Math_plotter/mathplotter/readEquations.py",
    "content": "import cv2\nimport numpy as np\nfrom PIL import Image\nimport mathplotter.utils as utils\n\n\ncolors = utils.plotColors()\ncv2_color = colors[0]\nplt_color = colors[1]\n\n\ndef to_bw(img):\n    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)\n    # img_blur = cv2.GaussianBlur(img, (21, 21), 0)\n    (thresh, img_bw) = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)\n\n    setThersh = utils.valTrackbars()[3]\n\n    if thresh < setThersh:\n        _, img_bw = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY)\n    kernel_size = utils.valTrackbars()[0]\n    kernel_dilate = np.ones((kernel_size, kernel_size))\n    img_dilate = cv2.dilate(img_bw, kernel_dilate, iterations=1)\n\n    return img_dilate\n\n\ndef find_equations(img, back_img):\n\n    subtracted_img = cv2.subtract(back_img, img)\n    img_bw = to_bw(subtracted_img)\n    img_clean = cv2.bitwise_not(img_bw)\n\n    ker_morph_x, ker_morph_y = utils.valTrackbars()[1:3]\n    if ker_morph_x == 0:\n        ker_morph_x = 1\n    if ker_morph_y == 0:\n        ker_morph_y = 1\n\n    kernel_morph = cv2.getStructuringElement(cv2.MORPH_RECT, (ker_morph_x, ker_morph_y))\n    img_morph = cv2.morphologyEx(img_bw, cv2.MORPH_DILATE, kernel_morph)\n\n    # ---Finding contours ---\n    contours, hierarchy = cv2.findContours(\n        img_morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE\n    )\n\n    equations = []\n    box_coords = []\n\n    for cnt in contours:\n        x, y, w, h = cv2.boundingRect(cnt)\n\n        box_coords.append([x, y, w, h])\n        img_cropped = img_clean[y : y + h, x : x + w]\n\n        equations.append(img_cropped)\n\n    return img_clean, equations, box_coords\n\n\ndef frame_change(new_frame, old_frame):\n    delta = cv2.subtract(old_frame, new_frame)\n    # check for rgb image\n    if len(delta.shape) == 3:\n        delta = to_bw(delta)\n\n    return np.linalg.norm(delta) != 0\n\n\nif __name__ == \"__main__\":\n    from click_and_crop import image_crop\n    from time import sleep\n\n    utils.initializeTrackbars()\n\n    back = cv2.imread(\"background1.jpg\")\n    frame = cv2.imread(\"frame1.jpg\")\n    while True:\n\n        if cv2.waitKey(1) & 0xFF == 27:\n            break  # esc to quit\n\n        frameBW, equations, bboxes = find_equations(frame, back)\n        frameBW_BGR = cv2.cvtColor(\n            frameBW,\n            cv2.COLOR_GRAY2BGR,\n        )\n\n        try:\n            if bboxes:\n                for idx, bbox in enumerate(bboxes):\n                    x, y, w, h = bbox\n\n                    cv2.rectangle(\n                        frameBW_BGR, (x, y), (x + w, y + h), cv2_color[idx], 4\n                    )\n\n                    cv2.imshow(\"pic\", frameBW_BGR)\n        except:\n            cv2.imshow(\"pic\", frameBW_BGR)\n"
  },
  {
    "path": "2022/Math_plotter/mathplotter/utils.py",
    "content": "# import opencv and numpy\nimport cv2\nimport numpy as np\n\n\ndef plotColors():\n    cv2_color = [\n        (130, 3, 3),\n        (0, 37, 184),\n        (2, 99, 15),\n        (70, 2, 99),\n        (1, 140, 117),\n        (138, 145, 1),\n        (1, 120, 106),\n    ]\n    plt_color = [\n        \"#0025b8\",\n        \"#820303\",\n        \"#02630f\",\n        \"#460263\",\n        \"#018c75\",\n        \"#8a9101\",\n        \"#01786a\",\n    ]\n    return [cv2_color, plt_color]\n\n\n# trackbar callback fucntion does nothing but required for trackbar\ndef nothing(x):\n    pass\n\n\ndef initializeTrackbars(initVals=[1, 150, 50, 2]):\n    # create a seperate window for trackbar\n    cv2.namedWindow(\"trackbars\")\n    # create trackbars\n    cv2.createTrackbar(\"Cut noise\", \"trackbars\", initVals[3], 20, nothing)\n    cv2.createTrackbar(\"Text width\", \"trackbars\", initVals[0], 10, nothing)\n    cv2.createTrackbar(\"Box width\", \"trackbars\", initVals[1], 300, nothing)\n    cv2.createTrackbar(\"Box hight\", \"trackbars\", initVals[2], 300, nothing)\n    cv2.createTrackbar(\"OCR\", \"trackbars\", 0, 1, nothing)\n\n\ndef valTrackbars():\n    textWidth = cv2.getTrackbarPos(\"Text width\", \"trackbars\")\n    boxWidth = cv2.getTrackbarPos(\"Box width\", \"trackbars\")\n    boxHeight = cv2.getTrackbarPos(\"Box hight\", \"trackbars\")\n    cutNoise = cv2.getTrackbarPos(\"Cut noise\", \"trackbars\")\n    ocr = cv2.getTrackbarPos(\"OCR\", \"trackbars\")\n\n    return [textWidth, boxWidth, boxHeight, cutNoise, ocr]\n"
  },
  {
    "path": "2022/Sentiment_Analysis/tw-sentiment.py",
    "content": "from transformers import AutoTokenizer, AutoModelForSequenceClassification\nfrom scipy.special import softmax\n\n# tweet = \"@MehranShakarami today's cold @ home 😒 https://mehranshakarami.com\"\ntweet = 'Great content! subscribed 😉'\n\n# precprcess tweet\ntweet_words = []\n\nfor word in tweet.split(' '):\n    if word.startswith('@') and len(word) > 1:\n        word = '@user'\n    \n    elif word.startswith('http'):\n        word = \"http\"\n    tweet_words.append(word)\n\ntweet_proc = \" \".join(tweet_words)\n\n# load model and tokenizer\nroberta = \"cardiffnlp/twitter-roberta-base-sentiment\"\n\nmodel = AutoModelForSequenceClassification.from_pretrained(roberta)\ntokenizer = AutoTokenizer.from_pretrained(roberta)\n\nlabels = ['Negative', 'Neutral', 'Positive']\n\n# sentiment analysis\nencoded_tweet = tokenizer(tweet_proc, return_tensors='pt')\n# output = model(encoded_tweet['input_ids'], encoded_tweet['attention_mask'])\noutput = model(**encoded_tweet)\n\nscores = output[0][0].detach().numpy()\nscores = softmax(scores)\n\nfor i in range(len(scores)):\n    \n    l = labels[i]\n    s = scores[i]\n    print(l,s)\n\n"
  },
  {
    "path": "2022/Twitter_API/config.ini",
    "content": "[twitter]\n\napi_key = \napi_key_secret = \n\naccess_token = \naccess_token_secret = \n\n"
  },
  {
    "path": "2022/Twitter_API/twitter_data_search.py",
    "content": "import tweepy\nimport configparser\nimport pandas as pd\n\n# read configs\nconfig = configparser.ConfigParser()\nconfig.read('config.ini')\n\napi_key = config['twitter']['api_key']\napi_key_secret = config['twitter']['api_key_secret']\n\naccess_token = config['twitter']['access_token']\naccess_token_secret = config['twitter']['access_token_secret']\n\n# authentication\nauth = tweepy.OAuthHandler(api_key, api_key_secret)\nauth.set_access_token(access_token, access_token_secret)\n\napi = tweepy.API(auth)\n\n# user tweets\n# user = 'veritasium'\n# limit=300\n\n# tweets = tweepy.Cursor(api.user_timeline, screen_name=user, count=200, tweet_mode='extended').items(limit)\n\n# search tweets\nkeywords = '@veritasium'\nlimit=300\n\ntweets = tweepy.Cursor(api.search_tweets, q=keywords, count=100, tweet_mode='extended').items(limit)\n\n# tweets = api.user_timeline(screen_name=user, count=limit, tweet_mode='extended')\n\n# create DataFrame\ncolumns = ['User', 'Tweet']\ndata = []\n\nfor tweet in tweets:\n    data.append([tweet.user.screen_name, tweet.full_text])\n\ndf = pd.DataFrame(data, columns=columns)\n\nprint(df)\n\n"
  },
  {
    "path": "2022/Twitter_API/twitter_data_stream.py",
    "content": "import tweepy\nimport configparser\nimport pandas as pd\n\n\n# read configs\nconfig = configparser.ConfigParser()\nconfig.read('config.ini')\n\napi_key = config['twitter']['api_key']\napi_key_secret = config['twitter']['api_key_secret']\n\naccess_token = config['twitter']['access_token']\naccess_token_secret = config['twitter']['access_token_secret']\n\n# authentication\nauth = tweepy.OAuthHandler(api_key, api_key_secret)\nauth.set_access_token(access_token, access_token_secret)\n\napi = tweepy.API(auth)\n\n\nclass Linstener(tweepy.Stream):\n\n    tweets = []\n    limit = 1\n\n    def on_status(self, status):\n        self.tweets.append(status)\n        # print(status.user.screen_name + \": \" + status.text)\n\n        if len(self.tweets) == self.limit:\n            self.disconnect()\n\n\n\n\n\nstream_tweet = Linstener(api_key, api_key_secret, access_token, access_token_secret)\n\n# stream by keywords\n# keywords = ['2022', '#python']\n\n# stream_tweet.filter(track=keywords)\n\n# stream by users\nusers = ['MehranShakarami', 'veritasium']\nuser_ids = []\n\nfor user in users:\n    user_ids.append(api.get_user(screen_name=user).id)\n\nstream_tweet.filter(follow=user_ids)\n\n# create DataFrame\n\ncolumns = ['User', 'Tweet']\ndata = []\n\nfor tweet in stream_tweet.tweets:\n    if not tweet.truncated:\n        data.append([tweet.user.screen_name, tweet.text])\n    else:\n        data.append([tweet.user.screen_name, tweet.extended_tweet['full_text']])\n\ndf = pd.DataFrame(data, columns=columns)\n\nprint(df)"
  },
  {
    "path": "2022/Twitter_API/twitter_data_users.py",
    "content": "import tweepy\nimport configparser\nimport pandas as pd\n\n# read configs\nconfig = configparser.ConfigParser()\nconfig.read('config.ini')\n\napi_key = config['twitter']['api_key']\napi_key_secret = config['twitter']['api_key_secret']\n\naccess_token = config['twitter']['access_token']\naccess_token_secret = config['twitter']['access_token_secret']\n\n# authentication\nauth = tweepy.OAuthHandler(api_key, api_key_secret)\nauth.set_access_token(access_token, access_token_secret)\n\napi = tweepy.API(auth)\n\n# user tweets\nuser = 'veritasium'\nlimit=300\n\ntweets = tweepy.Cursor(api.user_timeline, screen_name=user, count=200, tweet_mode='extended').items(limit)\n\n# tweets = api.user_timeline(screen_name=user, count=limit, tweet_mode='extended')\n\n# create DataFrame\ncolumns = ['User', 'Tweet']\ndata = []\n\nfor tweet in tweets:\n    data.append([tweet.user.screen_name, tweet.full_text])\n\ndf = pd.DataFrame(data, columns=columns)\n\nprint(df)\n\n"
  },
  {
    "path": "2022/Web_Scraping/bs-amazon.py",
    "content": "from bs4 import BeautifulSoup\nimport requests\nimport csv\n\n\n# get html\nurl = \"https://www.amazon.com/Best-Sellers-Books/zgbs/books\"\n\n# change the user-agent value based on your web browser\nheaders = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36'}\n\npage = requests.get(url, headers=headers)\n\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all books\nbooks = soup.find_all(id=\"gridItemRoot\")\n\ncsv_headers = ['Rank', 'Title', 'Author', 'Price']\nwith open('amazon_books.csv', 'w', encoding='utf-8', newline='') as f:\n    writer = csv.writer(f)\n    writer.writerow(csv_headers)\n\n\nfor book in books:\n\n    rank = book.find('span', class_='zg-bdg-text').text[1:]\n\n    children = book.find('div', class_='zg-grid-general-faceout').div\n\n    title = children.contents[1].text\n    author = children.contents[2].text\n    price = children.contents[-1].text\n   \n    with open('amazon_books.csv', 'a', encoding='utf-8', newline='') as f:\n        writer = csv.writer(f)\n        writer.writerow([rank, title, author, price])\n"
  },
  {
    "path": "2022/snscrape/tweets.py",
    "content": "import snscrape.modules.twitter as sntwitter\nimport pandas as pd\n\nquery = \"(from:elonmusk) until:2020-01-01 since:2010-01-01\"\ntweets = []\nlimit = 5000\n\n\nfor tweet in sntwitter.TwitterSearchScraper(query).get_items():\n    \n    # print(vars(tweet))\n    # break\n    if len(tweets) == limit:\n        break\n    else:\n        tweets.append([tweet.date, tweet.username, tweet.content])\n        \ndf = pd.DataFrame(tweets, columns=['Date', 'User', 'Tweet'])\nprint(df)\n\n# to save to csv\n# df.to_csv('tweets.csv')"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/main.py",
    "content": "from sentiment import predict_sentiment, ROBERTA_SUPPORTED_LANGUAGES\nfrom translate import translate_text\nimport csv\n\ndef read_tweets(file_path: str) -> list[dict[str, str]]:\n    with open(file_path, \"r\") as file:\n        reader = csv.DictReader(file)\n        list_of_tweets = list(reader)\n    return list_of_tweets\n\ntweets = read_tweets(\"./tweets.csv\")\n\nsentiment_by_id = {}\n\nfor tweet in tweets:\n    tweet_text, language = tweet[\"text\"], tweet[\"language\"]\n\n    if not (language and language in ROBERTA_SUPPORTED_LANGUAGES):\n        translated_text, language = translate_text(tweet_text)\n\n    if language in ROBERTA_SUPPORTED_LANGUAGES:\n        sentiment = predict_sentiment(tweet_text)\n    else:\n        sentiment = predict_sentiment(translated_text)\n\n    sentiment_by_id[tweet[\"id\"]] = sentiment\n\n\n#/ check the accuracy\ntest_labels = read_tweets(\"./test_labels.csv\")\ncorrect_predictions = 0\n\nfor test in test_labels:\n    if sentiment_by_id[test[\"id\"]] == test[\"label\"]:\n        correct_predictions += 1\n\naccuracy = correct_predictions / len(test_labels)\nprint(f\"Accuracy: {accuracy:.2f}\")"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/readme.md",
    "content": "# Multi-lingual tweet sentiment analysis\n\nThis code is based on [twitter-XLM-roBERTa-base](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment)\nand Google Translate to perform sentiment analysis on multi-lingual tweets.\n\nCheck out the supplementary YouTube tutorial: https://youtu.be/t_A_35m9OzU"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/requirements.txt",
    "content": "googletrans==3.1.0a0\ntransformers==4.44.2\ntorch==2.4.0\nsentencepiece==0.2.0\nprotobuf==5.28.0"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/sentiment.py",
    "content": "from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig\n\n\nMODEL = \"cardiffnlp/twitter-xlm-roberta-base-sentiment\"\nROBERTA_SUPPORTED_LANGUAGES = ('ar', 'en', 'fr', 'de', 'hi', 'it', 'es', 'pt')\n\nmodel = AutoModelForSequenceClassification.from_pretrained(MODEL)\ntokenizer = AutoTokenizer.from_pretrained(MODEL)\nconfig = AutoConfig.from_pretrained(MODEL)\n\n#/ save the model locally\nmodel.save_pretrained(MODEL)\ntokenizer.save_pretrained(MODEL)\n\n\n# Preprocess text (username and link placeholders)\ndef preprocess(text):\n    new_text = []\n    for t in text.split(\" \"):\n        t = '@user' if t.startswith('@') and len(t) > 1 else t\n        t = 'http' if t.startswith('http') else t\n        new_text.append(t)\n    return \" \".join(new_text)\n\ndef predict_sentiment(text: str) -> str:\n    processed_text = preprocess(text)\n    encoded_input = tokenizer(processed_text, return_tensors='pt')\n    output = model(**encoded_input)\n    index_of_sentiment = output.logits.argmax().item()\n    sentiment = config.id2label[index_of_sentiment]\n    return sentiment\n\n\n\n# text = \"la pizza da @michele è veramente buona https://www.youtube.com\"\n# text = \"این غذا خیلی شوره!\"\n# text = \"یه جلسه دیگه که میتونست یه ایمیل باشه 🥲\"\n# print(predict_sentiment(text))"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/test_labels.csv",
    "content": "id,label\n0,positive\n1,negative\n2,positive\n3,neutral\n4,negative\n5,neutral\n6,positive\n7,neutral\n8,positive\n9,negative\n10,neutral\n11,positive\n12,negative\n13,negative"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/translate.py",
    "content": "from googletrans import Translator\n\ntranslator = Translator()\n\n\n\ndef translate_text(original_text: str) -> str:\n    translation = translator.translate(original_text, dest='en')\n    translated_text, original_language = translation.text, translation.src\n    return translated_text, original_language\n\n# original_text = \"این غذا خیلی شوره!\"\n# print(translate_text(original_text))"
  },
  {
    "path": "2024/Multi-lingual sentiment analysis/tweets.csv",
    "content": "id,text,language\n0,باب الحارة يغدر بيك غدر، يخليك اتفرج علية من الاول بدون ما تفطن😂,\n1,مشاكل البلاك بورد متى تنتهي💔؟,\n2,@FCAugsburg Der neue Styler der Bilder ist echt genial! Gefällt mir!,de\n3,Kein Pardon: UEFA-Ermittlungen gegen Manchester United! #MUFCFCV #MUFC #Manchester https://t.co/PwZFpoqN1p https://t.co/PYfNgyneSu,de\n4,\"Ach man, ich hasse Gegentore. #S04\",de\n5,is listening to Jello Biafra at work.,en\n6,\"@_chloe yes! it`s on youtube  its from may 7th, and it made me feel 100x better. it`s halarious.\",en\n7,اینا اپلیکیشن فرم ندارن براشون اپلای کنیم؟,fa\n8,این ترم هم معدل الف شدم هو هو🤭🤭,fa\n9,خب حس میکنم پیر شدم و تا اطلاع ثانوی همه چی کنسله کنسل,fa\n10,सर्व धर्म मंदिर के अलावा यहां पर एक संग्रहालय और चिड़ियाघर भी है।,hi\n11,\"इस पर ब्राउज़िंग करना, गेम्स खेलना और मूवी देखना काफी अच्छा अनुभव है।\",hi\n12,कम से कम HD स्क्रीन तो देनी चाहिए थी।,hi\n13,Great! Another meeting that could be an email.,en"
  },
  {
    "path": "2024/Twikit/config.ini",
    "content": "[X]\nusername = xxx\npassword = xxxxxx\nemail = xxxx@xxxx.com"
  },
  {
    "path": "2024/Twikit/main.py",
    "content": "from twikit import Client, TooManyRequests\nimport time\nfrom datetime import datetime\nimport csv\nfrom configparser import ConfigParser\nfrom random import randint\n\n\nMINIMUM_TWEETS = 10\nQUERY = '(from:elonmusk) lang:en until:2020-01-01 since:2018-01-01'\n\n\ndef get_tweets(tweets):\n    if tweets is None:\n        #* get tweets\n        print(f'{datetime.now()} - Getting tweets...')\n        tweets = client.search_tweet(QUERY, product='Top')\n    else:\n        wait_time = randint(5, 10)\n        print(f'{datetime.now()} - Getting next tweets after {wait_time} seconds ...')\n        time.sleep(wait_time)\n        tweets = tweets.next()\n\n    return tweets\n\n\n#* login credentials\nconfig = ConfigParser()\nconfig.read('config.ini')\nusername = config['X']['username']\nemail = config['X']['email']\npassword = config['X']['password']\n\n#* create a csv file\nwith open('tweets.csv', 'w', newline='') as file:\n    writer = csv.writer(file)\n    writer.writerow(['Tweet_count', 'Username', 'Text', 'Created At', 'Retweets', 'Likes'])\n\n\n\n#* authenticate to X.com\n#! 1) use the login credentials. 2) use cookies.\nclient = Client(language='en-US')\n# client.login(auth_info_1=username, auth_info_2=email, password=password)\n# client.save_cookies('cookies.json')\n\nclient.load_cookies('cookies.json')\n\ntweet_count = 0\ntweets = None\n\nwhile tweet_count < MINIMUM_TWEETS:\n\n    try:\n        tweets = get_tweets(tweets)\n    except TooManyRequests as e:\n        rate_limit_reset = datetime.fromtimestamp(e.rate_limit_reset)\n        print(f'{datetime.now()} - Rate limit reached. Waiting until {rate_limit_reset}')\n        wait_time = rate_limit_reset - datetime.now()\n        time.sleep(wait_time.total_seconds())\n        continue\n\n    if not tweets:\n        print(f'{datetime.now()} - No more tweets found')\n        break\n\n    for tweet in tweets:\n        tweet_count += 1\n        tweet_data = [tweet_count, tweet.user.name, tweet.text, tweet.created_at, tweet.retweet_count, tweet.favorite_count]\n        \n        with open('tweets.csv', 'a', newline='') as file:\n            writer = csv.writer(file)\n            writer.writerow(tweet_data)\n\n    print(f'{datetime.now()} - Got {tweet_count} tweets')\n\n\nprint(f'{datetime.now()} - Done! Got {tweet_count} tweets found')"
  },
  {
    "path": "2024/Twikit/readme.md",
    "content": "# Scraping `X.com` with Twikit\n\nThis code use [Twikit](https://github.com/d60/twikit) to scrape Tweet data. To run the code, use \n`pip install “twikit==1.7.6”` to install the _twikit_ package. The latest update of _twikit_ \ndeprecated the synchronous method which is used in the code.\n\nCheck out the supplementary YouTube tutorial: https://youtu.be/6D6fVyFQD5A"
  },
  {
    "path": "2024/Twikit/tweets.csv",
    "content": "Tweet_count,Username,Text,Created At,Retweets,Likes\n"
  },
  {
    "path": "README.md",
    "content": "# AI_Spectrum\n \n"
  }
]