Saving EOD Stock Data using Apache Arrow Examples

Posted on May 20, 2018 by ryan

A few examples of working with Apache Arrow and Python

	# examples of apache arrow

	# create data folder to download stock data to
	import os

	cur_dir = os.getcwd()
	data_dir = os.path.join(cur_dir, 'data')

	if not os.path.exists(data_dir):
	os.makedirs('data')

	# pip install pandas
	import pandas as pd

	# conda install arrow-cpp=0.9.* -c conda-forge
	# conda install pyarrow=0.9.* -c conda-forge
	# or
	# https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyarrow
	# download
	# pip install pyarrow‑0.4.1‑cp36‑cp36m‑win_amd64.whl
	# pip install -U pyarrow

	import pyarrow as pa
	import pyarrow.parquet as pq

	ticker = 'aapl'
	eod_freq = 'daily'
	eod_start_date = '20120517'
	eod_end_date = '20180517'

	eod_url = r'https://storage.googleapis.com/shared-data-29319031/aapl_eod_data.csv'
	df_eod = pd.read_csv(eod_url)

	eod_filepath = os.path.join(data_dir, f'eod_{ticker}_{eod_start_date}_{eod_end_date}_{eod_freq}.csv')
	df_eod.to_csv(eod_filepath)

	# Convert DataFrame to Apache Arrow Table
	eod_table = pa.Table.from_pandas(df_eod)

	# Parquet uncompressed
	pq.write_table(eod_table, eod_filepath.replace(".csv",".pq"), compression='NONE')

	# Parquet with Snappy compression
	pq.write_table(eod_table, eod_filepath.replace(".csv",".snappy.pq"))

	# Parquet with GZIP compression
	pq.write_table(eod_table, eod_filepath.replace(".csv",".gzip.pq"), compression='GZIP')

	# Parquet with Brotli compression
	pq.write_table(eod_table, eod_filepath.replace(".csv",".brotli.pq"), compression='BROTLI')

	# read Parquet File to dataframe
	new_eod_table = pq.read_table(eod_filepath.replace(".csv",".brotli.pq"))
	# Convert back to pandas
	df_new_eod = new_eod_table.to_pandas()

	# Feather
	feather_filepath = eod_filepath.replace(".csv",".f")
	pa.write_feather(df_eod, feather_filepath)
	fdf = pa.read_feather(feather_filepath)
	# multicore read
	fdf = pa.read_feather(feather_filepath, nthreads=4)

	# from pandas
	new_fdf = pa.Table.from_pandas(df_eod)
	new_fdf.schema

view raw py_apache_arrow_examples.py hosted with

by GitHub

Related

Leave a Reply Cancel reply