[
  {
    "path": ".travis.yml",
    "content": "language: python\npython:\n    - \"2.7\"\n\nbefore_install: sudo apt-get install -qq python-numpy python-scipy\ninstall: pip install -r requirements.txt\n\nscript: echo \"TODO\"\n"
  },
  {
    "path": "README.md",
    "content": "## Deprecated\n\nThis project is no longer maintained."
  },
  {
    "path": "bin/stockeye-corpus",
    "content": "python -m nltk.downloader stopwords\npython -m nltk.downloader punkt"
  },
  {
    "path": "requirements.txt",
    "content": "beautifulsoup4>=4.3.2\nbs4>=0.0.1\nDateTime>=4.1.1\nemail>=4.0.2\nlxml>=3.3.5\nnewspaper>=0.0.9.8\nnltk>=3.2.2\nnumpy>=1.8.2\nrequests>=2.3.0"
  },
  {
    "path": "stockeye/__init__.py",
    "content": "from watch import watch\n"
  },
  {
    "path": "stockeye/symbols/alpha.txt",
    "content": "AfterHoursChangeRealtime\nAnnualizedGain\nAsk\nAskRealtime\nAverageDailyVolume\nBid\nBidRealtime\nBookValue\nChange\nChangeFromFiftydayMovingAverage\nChangeFromTwoHundreddayMovingAverage\nChangeFromYearHigh\nChangeFromYearLow\nChangePercentRealtime\nChangeRealtime\nChangeinPercent\nCommission\nCurrency\nDaysHigh\nDaysLow\nDaysRange\nDaysRangeRealtime\nDaysValueChange\nDaysValueChangeRealtime\nDividendPayDate\nDividendShare\nDividendYield\nEBITDA\nEPSEstimateCurrentYear\nEPSEstimateNextQuarter\nEPSEstimateNextYear\nEarningsShare\nExDividendDate\nFiftydayMovingAverage\nHighLimit\nHoldingsGain\nHoldingsGainPercent\nHoldingsGainPercentRealtime\nHoldingsGainRealtime\nHoldingsValue\nHoldingsValueRealtime\nLastTradeDate\nLastTradePriceOnly\nLastTradeRealtimeWithTime\nLastTradeTime\nLastTradeWithTime\nLowLimit\nMarketCapRealtime\nMarketCapitalization\nMoreInfo\nName\nNotes\nOneyrTargetPrice\nOpen\nOrderBookRealtime\nPEGRatio\nPERatio\nPERatioRealtime\nPercentChangeFromYearHigh\nPercentChange\nPercentChangeFromFiftydayMovingAverage\nPercentChangeFromTwoHundreddayMovingAverage\nPercentChangeFromYearLow\nPreviousClose\nPriceBook\nPriceEPSEstimateCurrentYear\nPriceEPSEstimateNextYear\nPricePaid\nPriceSales\nSharesOwned\nShortRatio\nStockExchange\nSymbol\nTickerTrend\nTradeDate\nTwoHundreddayMovingAverage\nVolume\nYearHigh\nYearLow\nYearRange"
  },
  {
    "path": "stockeye/symbols/clean.txt",
    "content": "After Hours Change Realtime\nAnnualized Gain\nAsk\nAsk Realtime\nAverage Daily Volume\nBid\nBid Realtime\nBook Value\nChange\nChange From Fifty Day Moving Average\nChange From Two Hundred Day Moving Average\nChange From Year High\nChange From Year Low\nChange Percent Realtime\nChange Realtime\nChange in Percent\nCommission\nCurrency\nDays High\nDays Low\nDays Range\nDays Range Realtime\nDays Value Change\nDays Value Change Realtime\nDividend Pay Date\nDividend Share\nDividend Yield\nEBITDA\nEPS Estimate Current Year\nEPS Estimate Next Quarter\nEPS Estimate Next Year\nEarnings Share\nEx Dividend Date\nFifty Day Moving Average\nHigh Limit\nHoldings Gain\nHoldings Gain Percent\nHoldings Gain Percent Realtime\nHoldings Gain Realtime\nHoldings Value\nHoldings Value Realtime\nLast Trade Date\nLast Trade Price Only\nLast Trade Realtime With Time\nLast Trade Time\nLast Trade With Time\nLow Limit\nMarket Cap Realtime\nMarket Capitalization\nMore Info\nName\nNotes\nOneyr Target Price\nOpen\nOrder Book Realtime\nPEG Ratio\nPE Ratio\nPE Ratio Realtime\nPercent Change From Year High\nPercent Change\nPercent Change From Fifty Day Moving Average\nPercent Change From Two Hundred Day Moving Average\nPercent Change From Year Low\nPrevious Close\nPrice Book\nPrice EPS Estimate Current Year\nPrice EPS Estimate Next Year\nPrice Paid\nPrice Sales\nShares Owned\nShort Ratio\nStock Exchange\nSymbol\nTicker Trend\nTrade Date\nTwo Hundred Day Moving Average\nVolume\nYear High\nYear Low\nYear Range"
  },
  {
    "path": "stockeye/watch.py",
    "content": "from requests  import get\nfrom time      import sleep\nfrom random    import randint\nfrom newspaper import Article\nfrom bs4       import BeautifulSoup\nfrom re        import search, sub  \nfrom datetime  import datetime, timedelta\nfrom math      import log10 \n\nfrom smtplib              import SMTP\nfrom email.mime.multipart import MIMEMultipart\nfrom email.mime.text      import MIMEText\nfrom nltk.tokenize        import sent_tokenize, word_tokenize\nfrom nltk.corpus          import stopwords\nstopWords = set(stopwords.words('english'))\n\n# --- Textrank Methods ---------------------------------------------------------\n\nclass vertex:\n    order = 0\n    def __init__(self, sentence_raw, sentence_processed, words):\n        self.order              = vertex.order\n        self.score              = None\n        self.scores             = []\n        self.sentence_raw       = sentence_raw\n        self.sentence_processed = sentence_processed\n        self.words              = words\n        vertex.order += 1\n        \n    def averageScores(self):\n        try: self.score = sum(self.scores)/len(self.scores)\n        except ZeroDivisionError: self.score = 0\n\ndef overlap(w1, w2):\n    s1 = []\n    for w in w1:\n        if w not in stopWords:\n            s1.append(w)\n    s2 = []\n    for w in w2:\n        if w not in stopWords:\n            s2.append(w)\n            \n    try: return len([w for w in s1 if w in s2])/(log10(len(s1))+log10(len(s2)))\n    except ZeroDivisionError: return 0\n    \ndef buildGraph(text):\n    vertices = [] \n    sentences = sent_tokenize(text, language='english')\n    for sentence_raw in sentences:  \n        sentence_processed = sub(\"[^a-zA-Z ]+\", '', sentence_raw).lower()          \n        words = word_tokenize(sentence_processed, language='english')\n        vertices.append(vertex(sentence_raw, sentence_processed, words))\n    \n    for v1 in vertices:\n        for v2 in vertices:\n            if v1.order != v2.order:                \n                v1.scores.append(overlap(v1.words, v2.words))\n        v1.averageScores()\n    return vertices\n\ndef summarize(text, length, firstlast = False):\n    vertices = buildGraph(text)\n    all_ord = sorted(vertices, key=lambda v: v.order)\n    mos_sig = sorted(vertices, key=lambda v: v.score, reverse=True)[0:length]\n    mos_sig_ord = sorted(mos_sig, key=lambda v: v.order)\n        \n    if firstlast:\n        if all_ord[0] not in mos_sig_ord: \n            mos_sig_ord.insert(0, all_ord[0])\n        if all_ord[len(all_ord)-1] not in mos_sig_ord:\n            mos_sig_ord.append(all_ord[len(all_ord)-1])\n    \n    summary = []\n    for v in mos_sig_ord:\n        summary.append(v.sentence_raw)   \n    return summary\n\n# --- Yahoo Methods -----------------------------------------------------------\n\ndef loadSymbols():\n    afile = open('symbols/alpha.txt', 'r')\n    cfile = open('symbols/clean.txt', 'r') \n    alpha, clean = [], [] \n    for a in afile:\n        alpha.append(a.strip('\\n'))\n    for c in cfile:\n        clean.append(c.strip('\\n'))\n    symbols = {}\n    for i in xrange(len(alpha)):\n        symbols[alpha[i]] = clean[i]\n    return symbols\n\ndef yahooURL(ticks):\n    query = ''\n    for i,t in enumerate(ticks):\n        if i == len(ticks)-1: query += '%22'+t+'%22'\n        else: query += '%22'+t+'%22%2C'\n    return \"https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(\"+query+\")%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env&callback=\"\n\ndef yahooRequest(url, moreProperties = []):\n    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}\n    r = get(url, headers=headers)\n    j = r.json()\n    try: \n        quotes = j['query']['results']['quote']      \n    except KeyError:                         \n        print \"No Stocks Found!\"             # If zero stocks found\n        return \n    stocks = {}\n    properties = ['Name'] + moreProperties       \n    if type(quotes) == dict:                 # If one stock found\n        stocks[quotes['Symbol']] = {}\n        for p in properties:\n            try:\n                stocks[quotes['Symbol']][p] = quotes[p]\n            except KeyError:\n                stocks[quotes['Symbol']][p] = \"None\"        \n    else:\n        for q in quotes:                     # If multiple stocks found\n            stocks[q['Symbol']] = {}\n            for p in properties:\n                try:\n                    stocks[q['Symbol']][p] = q[p]\n                except KeyError:\n                    stocks[q['Symbol']][p] = \"None\"\n    return stocks\n\n# --- Email Methods ------------------------------------------------------------\n\ndef stats_HTML(symbol, statistics, properties):\n    symbols = loadSymbols()\n    stats = '<center><b>'+symbol+'</b><br><br><table>'\n    for p in properties:\n        try:\n            stats += '<tr><td style=\"padding-right:30px\">'+symbols[p]+'</td>'\n        except:\n            stats += '<tr><td>'+p+'</td>'\n        stats += '<td>'+str(statistics[symbol][p])+'</td></tr>'\n    return stats+'</table><br><hr><br></center>'\n\ndef outline_HTML(i, title, link, time, summary):\n    title_HTML = '<br>'+str(i+1)+'. <b><a href=\"'+link+'\">'+title+'</a></b><br>' \n    time_HTML = 'Posted '+time+'<br>'\n    summary_HTML = ''    \n    for sentence in summary:\n        summary_HTML += '<br><i>'+sentence+'<br></i>'\n    return title_HTML+time_HTML+summary_HTML\n\ndef subject_HTML(symbol):\n    subject = 'Recent News Activity for '+symbol\n    return subject\n\ndef body_HTML(symbol, statistics, properties, articles):\n    body = ''\n    body += stats_HTML(symbol, statistics, properties)\n    for i, a in enumerate(articles):\n        body += outline_HTML(i, a.title, a.link, a.time, a.summary)\n    return body    \n\ndef sendEmail(subject, body, credentials):    \n    self = credentials[0]\n    password = credentials[1]    \n    fromAddr = credentials[2]\n    toAddr = credentials[3]   \n    msg = MIMEMultipart()\n    msg['From'] = fromAddr\n    msg['To'] = toAddr\n    msg['Subject'] = subject   \n    msgText = MIMEText(body, 'html', 'UTF-8')\n    msg.attach(msgText)\n    server = SMTP('smtp.gmail.com', 587)\n    server.starttls()\n    server.login(self, password)\n    text = msg.as_string()\n    server.sendmail(fromAddr, toAddr, text)\n    server.quit()\n\n# --- Scraping Methods ---------------------------------------------------------\n\nclass article:    \n    def __init__(self, title, link, time):   \n        self.title   = title\n        self.link    = link\n        self.time    = time\n        self.order   = None\n        self.body    = []\n        self.summary = []\n        \n    def printTitle(self):\n        print self.title\n        \n    def printBody(self):\n        for s in self.body:\n            print '  ',\n            print s\n            print\n        \n    def printSummary(self):\n        for s in self.summary:\n            print s\n            print\n\ndef similarity(s1, s2):\n    if len(s1) == 0: return len(s2)\n    elif len(s2) == 0: return len(s1)\n    v0 = [None]*(len(s2) + 1)\n    v1 = [None]*(len(s2) + 1)\n    for i in range(len(v0)):\n        v0[i] = i\n    for i in range(len(s1)):\n        v1[0] = i + 1\n        for j in range(len(s2)):\n            cost = 0 if s1[i] == s2[j] else 1\n            v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost)\n        for j in range(len(v0)):\n            v0[j] = v1[j]\n    return 100-((float(v1[len(s2)])/(len(s1)+len(s2)))*100)\n\ndef unique(title, articles):\n    for article in articles:\n        if similarity(title, article.title) >= 95:\n            return False\n    return True\n\ndef createURLs(query, pages):\n    pages = (10 * x for x in xrange(0, pages))\n    lower = query.lower().replace(' ', '+')\n    urls = ['https://www.google.com/search?q=\"%s\"&tbm=nws&tbs=qdr:y#q=\"%s\"&safe=active&tbs=qdr:y,sbd:1&tbm=nws&start=%s' % (lower, lower, x) for i, x in enumerate(pages)]\n    return urls\n  \ndef grabArticles(query, pages, rest = 0):\n    urls = createURLs(query, pages)\n    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}\n    articles = []\n    for url in urls:\n        response = get(url, headers=headers)\n        soup = BeautifulSoup(response.text, \"html.parser\") \n        objects_HId = soup.find_all(\"a\", class_=\"l._HId\")\n        objects_sQb = soup.find_all(\"a\", class_=\"_sQb\")\n        \n        for a in objects_HId:\n            title = a.get_text()\n            link = a['href']\n            try:\n                time = a.parent.find(\"span\", class_=\"_uQb\").text\n            except AttributeError: \n                time = a.parent.parent.find(\"span\", class_=\"_uQb\").text\n            if unique(title, articles):\n                articles.append(article(title, link, time))\n\n        for a in objects_sQb:\n            title =  a.get_text()\n            link = a['href']\n            try:\n                time = a.parent.find(\"span\", class_=\"_uQb\").text\n            except AttributeError:\n                time = a.parent.parent.find(\"span\", class_=\"_uQb\").text\n            if unique(title, articles):\n                articles.append(article(title, link, time))\n               \n        sleep(randint(float(rest)/2, rest))\n    return articles   \n\n# ----- Analytical Methods -----------------------------------------------------\n\ndef summarizeArticles(articles, length, firstlast = False):\n    summedArticles = []\n    for a in articles:\n        try: \n            A = Article(a.link)\n            A.download()\n            A.parse()\n            text = \"\"\n            paragraphs = A.text.split('\\n')\n            for p in paragraphs:\n                if len(p) > 100:\n                    a.body.append(p)\n                    text += p + ' ' \n            sentences = summarize(text, length, firstlast)\n            for s in sentences:\n                a.summary.append(s) \n            summedArticles.append(a)    \n        except: pass\n    return summedArticles\n\ndef sortArticles(articles):\n    for a in articles:    \n        time = a.time  \n        if search(\"second\", time):\n            seconds = int(time.split(' ')[0])\n            order = datetime.now()-timedelta(seconds=seconds)        \n        elif search(\"minute\", time):\n            minutes = int(time.split(' ')[0])\n            order = datetime.now()-timedelta(minutes=minutes)           \n        elif search(\"hour\", time):\n            hours = int(time.split(' ')[0])\n            order = datetime.now()-timedelta(hours=hours)        \n        else:\n            order = datetime.strptime(time, '%b %d, %Y') \n        a.order = order\n    return sorted(articles, key=lambda a: a.order, reverse=True)     \n\n# ----- The Mastermind ---------------------------------------------------------\n\ndef watch(credentials, ticks, properties = [], threshold = 5, hourspast = 18, sentences = 3, firstlast = False):\n    if threshold <= 0:\n        print \"Please choose a threshold greater than 0.\"\n        return         \n    if hourspast < 0:\n        print \"This program is not capable of scraping news from the future.\"\n        return  \n    if len(ticks) > 100:\n        print \"API calls are limited to 100 individual stocks.\"\n        return   \n    \n    estimate = len(ticks)*15*2\n    if estimate < 60: print \"This run will take approximately %s seconds\" % (str(estimate))\n    else: print \"This run will take approximately %s minutes\" % (str(estimate/60))    \n    \n    url = yahooURL(ticks)\n    stats = yahooRequest(url, properties)\n    remove = ['class', 'common', 'stock']\n    for symbol in stats:\n        name = stats[symbol]['Name']\n        if name:\n            print \"Finding news for %s\" % (symbol)\n            query = (' '.join([w for w in name.split() if w.lower() not in remove]))+' '+symbol\n            articles = grabArticles(query, 2, 20)\n            articles = summarizeArticles(articles, sentences, firstlast)\n            articles = sortArticles(articles) \n\n            recentArticles = []\n            for a in articles:\n                hoursago = float((datetime.now()-a.order).total_seconds())/3600\n                if hoursago <= hourspast:\n                    recentArticles.append(a)    \n            if len(recentArticles) >= threshold:\n                subject = subject_HTML(symbol)\n                body = body_HTML(symbol, stats, properties, recentArticles)\n                sendEmail(subject, body, credentials)\n        else:\n            print \"Coudn't find any company for %s\" % (symbol)"
  }
]