Python – Page 2 – Ryan's Tangents

Classes for HTML, Mongo, Redis

Posted on January 3, 2017 by ryan

Request HTML Celery, RabbitMQ, and Redis

Posted on December 29, 2016 by ryan

To run need to do two things (while in folder of python filename.py): 1) Run worker by executing the python program with the “worker” arguement: $ celery –A tasks worker –loglevel=info 2) Call the task aka run: $ python filename.py ####################### # grabhtml.py import requests from html import unescape class GrabHTML(object): def __init__(self): pass…

Process HTML for Links

Posted on December 28, 2016 by ryan

SQL Server Create New Table And Insert Data using Python

Posted on December 26, 2016 by ryan

Python Script to insert CSV File into SQL Server Database import pandas as pd import csv import pyodbc import sys, os USERNAME = ‘sa’ PASSWORD = ‘password’ SERVER = ‘server’ DATABASE = ‘DATA’ DRIVERNAME = ‘ODBC Driver 13 for SQL Server’ cnxn = pyodbc.connect(‘Driver={‘+DRIVERNAME+’};Server=’+SERVER+’;Database=’+DATABASE+’;uid=’+USERNAME+’;pwd=’+PASSWORD) cur = cnxn.cursor() ##### using pandas grab first row for column…

Format clipboard text from Docfetcher to xplorer2

Posted on November 21, 2016 by ryan

I constantly use docfetcher, indexing documents (pdfs, excel, word, html, etc DocFetcher website) and xplorer2 for viewing/storing results in scrap container (xplorer2 website). One thing that has been bugging me for a while is how when I copy a list of documents from DocFetcher I’ve had to copy and to excel before I could paste into…

Click Through JavaScript Calendar and Download Excel Files

Posted on September 1, 2016 by ryan

While this is by no means perfect, but it got the job done. If interested in using need to change (highlighted white below) the website url, name of calendar id, and start_date. #! py27wimport os, timefrom datetime import datetimefrom datetime import datefrom datetime import timedeltafrom selenium import webdriverfrom selenium.webdriver.firefox.firefox_profile import FirefoxProfilefrom selenium.common.exceptions import NoSuchElementExceptionfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as EC fp = webdriver.FirefoxProfile()fp.set_preference(‘browser.download.folderList’, 2)fp.set_preference(“browser.download.manager.showWhenStarting”, False)fp.set_preference(‘browser.download.dir’, os.getcwd())fp.set_preference(“browser.helperApps.neverAsk.saveToDisk”, ‘application/vnd.ms-excel’)fp.set_preference(“browser.download.dir”, “c:\tmp”);driver = webdriver.Firefox(firefox_profile=fp)driver.get(‘https://www.zacks.com/earnings/earnings-reports’) def click_calendar(): try: element_xpath = ‘//*[@id=”earnings_release”]/div[1]/p/a’ element = WebDriverWait(driver, 10).until( lambda driver : driver.find_element_by_xpath(element_xpath).click() ) finally: print “clicked calendar” def click_prev_day(x): s = ‘datespan_%d’ % (x) try: WebDriverWait(driver, 10).until( lambda driver : driver.find_element_by_id(s).click() ) except: result = False else: result = True return result def click_export(): try: element = WebDriverWait(driver, 10).until( lambda driver : driver.find_element_by_id(‘export_excel’).click() ) except: result = False else: result = True return result def click_prev_month(): try: driver.find_element_by_id(‘prevCal’).click() except: result = False else: result = True i = 31 while i > 27: try: click_prev_day(i) return False except: print ‘could not find %s in prev month’ % (i) i -= 1 def subtract_day(n): y = n – 1 return y def start_date(): return datetime(2016,2,29) def click_to_start_date(): start_date = datetime(2016,2,28) a = date.today() b = start_date c = a.month – b.month if c > 0: click_calendar() while c > 0: click_prev_month() c -= 1 try: click_prev_day(31) except: click_prev_day(30) def main(): #click_to_start_date() #sdate = start_date() m = 12 while m > 0: m -= 1 for x in range(31,0,-1): click_calendar() click_prev_day(x) click_export() click_calendar() click_prev_month() if __name__ == ‘__main__’: main() Few areas where need to improve: click_prev_month() – had little difficulty…

Loading Adusted Close Price Data from Yahoo Finance using Pandas

Posted on August 25, 2016 by ryan

%matplotlib inlinefrom pandas.io.data import DataReaderfrom datetime import datefrom dateutil.relativedelta import relativedelta goog = DataReader(‘GOOG’, “yahoo”, date.today() + relativedelta(months=-3)) goog.tail() goog.plot(y=’Adj Close’);

Remove characters left of first Space

Posted on August 6, 2016 by ryan

#! py35# open file, process, and save output to new file stdin = r’C:projectstxt.txt’stdout = r’C:projectstext.csv’ with open(stdin, ‘r’, encoding=’utf-8′) as f: # open input file lines = f.readlines() # iterate one line at time for line in lines: # for each line l = line.split(‘ ‘, 1)[1] # remove left of first space with open(stdout, ‘a’) as fout: # open output file fout.write(l) …

Regex Find URLs

Posted on July 24, 2016 by ryan

import re result = re.sub(p, subst, test_str)p = re.compile(ur‘(?i)b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^s()<>{}[]]+|([^s()]*?([^s()]+)[^s()]*?)|([^s]+?))+(?:([^s()]*?([^s()]+)[^s()]*?)|([^s]+?)|[^s`!()[]{};:’”.,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)b/?(?!@)))’)test_str = u“”subst = u“” Even longer Regex for finding URL

Create and Import CSV file into Database

Posted on July 22, 2016 by ryan

Linkedin’s Industry Codes import sqlite3, os, csv conn = sqlite3.connect(‘industry_classifications’)curs = conn.cursor() tblcmd = ‘create table linkedin_industries (code int(3), groups char(60), description char(60))’curs = conn.execute(‘select * from linkedin_industries’)names = [description[0] for description in curs.description]namescurs.execute(tblcmd)curs.rowcount file = r’C:projectsLinkedinIndustry Codes.csv’ with open(file, ‘r’, encoding=’utf-8′) as f: readCSV = csv.reader(f, delimiter=’,’) for row in readCSV: curs.execute(‘INSERT INTO linkedin_industries (code, groups, description) VALUES (?, ?, ?)’, row) curs.execute(‘select * from linkedin_industries’)for rec in curs.fetchall(): print(rec) conn.commit()

Share

Share

Share

Share

Share

Share

Share

Share

Share

Share