{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "sns.set()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateOpenHighLowCloseAdj CloseVolume
02016-11-02778.200012781.650024763.450012768.700012768.7000121872400
12016-11-03767.250000769.950012759.030029762.130005762.1300051943200
22016-11-04750.659973770.359985750.560974762.020020762.0200202134800
32016-11-07774.500000785.190002772.549988782.520020782.5200201585100
42016-11-08783.400024795.632996780.190002790.510010790.5100101350800
\n", "
" ], "text/plain": [ " Date Open High Low Close Adj Close \\\n", "0 2016-11-02 778.200012 781.650024 763.450012 768.700012 768.700012 \n", "1 2016-11-03 767.250000 769.950012 759.030029 762.130005 762.130005 \n", "2 2016-11-04 750.659973 770.359985 750.560974 762.020020 762.020020 \n", "3 2016-11-07 774.500000 785.190002 772.549988 782.520020 782.520020 \n", "4 2016-11-08 783.400024 795.632996 780.190002 790.510010 790.510010 \n", "\n", " Volume \n", "0 1872400 \n", "1 1943200 \n", "2 2134800 \n", "3 1585100 \n", "4 1350800 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('../dataset/GOOG-year.csv')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from collections import deque\n", "import random\n", "\n", "class Model:\n", " def __init__(self, input_size, output_size, layer_size, learning_rate):\n", " self.X = tf.placeholder(tf.float32, (None, input_size))\n", " self.Y = tf.placeholder(tf.float32, (None, output_size))\n", " feed = tf.layers.dense(self.X, layer_size, activation = tf.nn.relu)\n", " tensor_action, tensor_validation = tf.split(feed,2,1)\n", " feed_action = tf.layers.dense(tensor_action, output_size)\n", " feed_validation = tf.layers.dense(tensor_validation, 1)\n", " self.logits = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))\n", " self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))\n", " self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n", " \n", "class Agent:\n", "\n", " LEARNING_RATE = 0.003\n", " BATCH_SIZE = 32\n", " LAYER_SIZE = 500\n", " OUTPUT_SIZE = 3\n", " EPSILON = 0.5\n", " DECAY_RATE = 0.005\n", " MIN_EPSILON = 0.1\n", " GAMMA = 0.99\n", " MEMORIES = deque()\n", " COPY = 1000\n", " T_COPY = 0\n", " MEMORY_SIZE = 300\n", " \n", " def __init__(self, state_size, window_size, trend, skip):\n", " self.state_size = state_size\n", " self.window_size = window_size\n", " self.half_window = window_size // 2\n", " self.trend = trend\n", " self.skip = skip\n", " tf.reset_default_graph()\n", " self.model = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)\n", " self.model_negative = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)\n", " self.sess = tf.InteractiveSession()\n", " self.sess.run(tf.global_variables_initializer())\n", " self.trainable = tf.trainable_variables()\n", " \n", " def _assign(self):\n", " for i in range(len(self.trainable)//2):\n", " assign_op = self.trainable[i+len(self.trainable)//2].assign(self.trainable[i])\n", " self.sess.run(assign_op)\n", "\n", " def _memorize(self, state, action, reward, new_state, done):\n", " self.MEMORIES.append((state, action, reward, new_state, done))\n", " if len(self.MEMORIES) > self.MEMORY_SIZE:\n", " self.MEMORIES.popleft()\n", "\n", " def _select_action(self, state):\n", " if np.random.rand() < self.EPSILON:\n", " action = np.random.randint(self.OUTPUT_SIZE)\n", " else:\n", " action = self.get_predicted_action([state])\n", " return action\n", "\n", " def _construct_memories(self, replay):\n", " states = np.array([a[0] for a in replay])\n", " new_states = np.array([a[3] for a in replay])\n", " Q = self.predict(states)\n", " Q_new = self.predict(new_states)\n", " Q_new_negative = self.sess.run(self.model_negative.logits, feed_dict={self.model_negative.X:new_states})\n", " replay_size = len(replay)\n", " X = np.empty((replay_size, self.state_size))\n", " Y = np.empty((replay_size, self.OUTPUT_SIZE))\n", " for i in range(replay_size):\n", " state_r, action_r, reward_r, new_state_r, done_r = replay[i]\n", " target = Q[i]\n", " target[action_r] = reward_r\n", " if not done_r:\n", " target[action_r] += self.GAMMA * Q_new_negative[i, np.argmax(Q_new[i])]\n", " X[i] = state_r\n", " Y[i] = target\n", " return X, Y\n", "\n", " def predict(self, inputs):\n", " return self.sess.run(self.model.logits, feed_dict={self.model.X:inputs})\n", " \n", " def get_predicted_action(self, sequence):\n", " prediction = self.predict(np.array(sequence))[0]\n", " return np.argmax(prediction)\n", " \n", " def get_state(self, t):\n", " window_size = self.window_size + 1\n", " d = t - window_size + 1\n", " block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]\n", " res = []\n", " for i in range(window_size - 1):\n", " res.append(block[i + 1] - block[i])\n", " return np.array(res)\n", " \n", " def buy(self, initial_money):\n", " starting_money = initial_money\n", " states_sell = []\n", " states_buy = []\n", " inventory = []\n", " state = self.get_state(0)\n", " for t in range(0, len(self.trend) - 1, self.skip):\n", " action = self._select_action(state)\n", " next_state = self.get_state(t + 1)\n", " \n", " if action == 1 and initial_money >= self.trend[t]:\n", " inventory.append(self.trend[t])\n", " initial_money -= self.trend[t]\n", " states_buy.append(t)\n", " print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))\n", " \n", " elif action == 2 and len(inventory):\n", " bought_price = inventory.pop(0)\n", " initial_money += self.trend[t]\n", " states_sell.append(t)\n", " try:\n", " invest = ((close[t] - bought_price) / bought_price) * 100\n", " except:\n", " invest = 0\n", " print(\n", " 'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'\n", " % (t, close[t], invest, initial_money)\n", " )\n", " \n", " state = next_state\n", " invest = ((initial_money - starting_money) / starting_money) * 100\n", " total_gains = initial_money - starting_money\n", " return states_buy, states_sell, total_gains, invest\n", " \n", " \n", " def train(self, iterations, checkpoint, initial_money):\n", " for i in range(iterations):\n", " total_profit = 0\n", " inventory = []\n", " state = self.get_state(0)\n", " starting_money = initial_money\n", " for t in range(0, len(self.trend) - 1, self.skip):\n", " if (self.T_COPY + 1) % self.COPY == 0:\n", " self._assign()\n", " \n", " action = self._select_action(state)\n", " next_state = self.get_state(t + 1)\n", " \n", " if action == 1 and starting_money >= self.trend[t]:\n", " inventory.append(self.trend[t])\n", " starting_money -= self.trend[t]\n", " \n", " elif action == 2 and len(inventory) > 0:\n", " bought_price = inventory.pop(0)\n", " total_profit += self.trend[t] - bought_price\n", " starting_money += self.trend[t]\n", " \n", " invest = ((starting_money - initial_money) / initial_money)\n", " \n", " self._memorize(state, action, invest, next_state, starting_money < initial_money)\n", " batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)\n", " state = next_state\n", " replay = random.sample(self.MEMORIES, batch_size)\n", " X, Y = self._construct_memories(replay)\n", " \n", " cost, _ = self.sess.run([self.model.cost, self.model.optimizer], \n", " feed_dict={self.model.X: X, self.model.Y:Y})\n", " self.T_COPY += 1\n", " self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)\n", " if (i+1) % checkpoint == 0:\n", " print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,\n", " starting_money))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From :12: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "keep_dims is deprecated, use keepdims instead\n", "epoch: 10, total rewards: 1486.684997.3, cost: 0.694152, total money: 10514.124999\n", "epoch: 20, total rewards: 313.279660.3, cost: 0.878157, total money: 8354.909665\n", "epoch: 30, total rewards: 752.595089.3, cost: 0.320037, total money: 10752.595089\n", "epoch: 40, total rewards: 1159.299987.3, cost: 0.318166, total money: 10186.739989\n", "epoch: 50, total rewards: 993.220279.3, cost: 0.391151, total money: 4149.310245\n", "epoch: 60, total rewards: 1616.499880.3, cost: 0.307440, total money: 9630.939883\n", "epoch: 70, total rewards: 941.484560.3, cost: 0.332979, total money: 6969.054506\n", "epoch: 80, total rewards: 904.899903.3, cost: 0.718111, total money: 1132.559876\n", "epoch: 90, total rewards: 346.619873.3, cost: 0.482044, total money: 542.599852\n", "epoch: 100, total rewards: 141.554626.3, cost: 0.238426, total money: 6115.974608\n", "epoch: 110, total rewards: -159.529845.3, cost: 0.202412, total money: 8852.270143\n", "epoch: 120, total rewards: -37.579779.3, cost: 0.433529, total money: 8945.780206\n", "epoch: 130, total rewards: 1049.544800.3, cost: 0.408910, total money: 8099.664795\n", "epoch: 140, total rewards: 59.114809.3, cost: 0.028664, total money: 7098.904848\n", "epoch: 150, total rewards: 96.424866.3, cost: 0.070552, total money: 9079.784851\n", "epoch: 160, total rewards: 74.179754.3, cost: 0.044092, total money: 10074.179754\n", "epoch: 170, total rewards: 80.999883.3, cost: 0.018813, total money: 8047.249883\n", "epoch: 180, total rewards: 62.700011.3, cost: 0.083292, total money: 10062.700011\n", "epoch: 190, total rewards: 70.424991.3, cost: 0.013884, total money: 9053.315006\n", "epoch: 200, total rewards: 10.620115.3, cost: 0.030838, total money: 10010.620115\n" ] } ], "source": [ "close = df.Close.values.tolist()\n", "initial_money = 10000\n", "window_size = 30\n", "skip = 1\n", "batch_size = 32\n", "agent = Agent(state_size = window_size, \n", " window_size = window_size, \n", " trend = close, \n", " skip = skip)\n", "agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "day 1: buy 1 unit at price 762.130005, total balance 9237.869995\n", "day 2, sell 1 unit at price 762.020020, investment -0.014431 %, total balance 9999.890015,\n", "day 11: buy 1 unit at price 771.229980, total balance 9228.660035\n", "day 12: buy 1 unit at price 760.539978, total balance 8468.120057\n", "day 13, sell 1 unit at price 769.200012, investment -0.263212 %, total balance 9237.320069,\n", "day 15, sell 1 unit at price 760.989990, investment 0.059170 %, total balance 9998.310059,\n", "day 34: buy 1 unit at price 794.559998, total balance 9203.750061\n", "day 35, sell 1 unit at price 791.260010, investment -0.415323 %, total balance 9995.010071,\n", "day 36: buy 1 unit at price 789.909973, total balance 9205.100098\n", "day 37, sell 1 unit at price 791.549988, investment 0.207620 %, total balance 9996.650086,\n", "day 38: buy 1 unit at price 785.049988, total balance 9211.600098\n", "day 40, sell 1 unit at price 771.820007, investment -1.685241 %, total balance 9983.420105,\n", "day 54: buy 1 unit at price 819.309998, total balance 9164.110107\n", "day 55, sell 1 unit at price 823.869995, investment 0.556566 %, total balance 9987.980102,\n", "day 62: buy 1 unit at price 798.530029, total balance 9189.450073\n", "day 64, sell 1 unit at price 801.340027, investment 0.351896 %, total balance 9990.790100,\n", "day 68: buy 1 unit at price 813.669983, total balance 9177.120117\n", "day 69, sell 1 unit at price 819.239990, investment 0.684554 %, total balance 9996.360107,\n", "day 72: buy 1 unit at price 824.159973, total balance 9172.200134\n", "day 73, sell 1 unit at price 828.070007, investment 0.474427 %, total balance 10000.270141,\n", "day 74: buy 1 unit at price 831.659973, total balance 9168.610168\n", "day 75, sell 1 unit at price 830.760010, investment -0.108213 %, total balance 9999.370178,\n", "day 79: buy 1 unit at price 823.210022, total balance 9176.160156\n", "day 80, sell 1 unit at price 835.239990, investment 1.461349 %, total balance 10011.400146,\n", "day 90: buy 1 unit at price 847.200012, total balance 9164.200134\n", "day 91, sell 1 unit at price 848.780029, investment 0.186499 %, total balance 10012.980163,\n", "day 93: buy 1 unit at price 848.400024, total balance 9164.580139\n", "day 94: buy 1 unit at price 830.460022, total balance 8334.120117\n", "day 95, sell 1 unit at price 829.590027, investment -2.217114 %, total balance 9163.710144,\n", "day 96, sell 1 unit at price 817.580017, investment -1.550948 %, total balance 9981.290161,\n", "day 100: buy 1 unit at price 831.409973, total balance 9149.880188\n", "day 101, sell 1 unit at price 831.500000, investment 0.010828 %, total balance 9981.380188,\n", "day 104: buy 1 unit at price 834.570007, total balance 9146.810181\n", "day 106: buy 1 unit at price 827.880005, total balance 8318.930176\n", "day 107, sell 1 unit at price 824.669983, investment -1.186242 %, total balance 9143.600159,\n", "day 108, sell 1 unit at price 824.729980, investment -0.380493 %, total balance 9968.330139,\n", "day 110: buy 1 unit at price 824.320007, total balance 9144.010132\n", "day 111, sell 1 unit at price 823.559998, investment -0.092198 %, total balance 9967.570130,\n", "day 115: buy 1 unit at price 841.650024, total balance 9125.920106\n", "day 116, sell 1 unit at price 843.190002, investment 0.182971 %, total balance 9969.110108,\n", "day 125: buy 1 unit at price 931.659973, total balance 9037.450135\n", "day 126, sell 1 unit at price 927.130005, investment -0.486225 %, total balance 9964.580140,\n", "day 127: buy 1 unit at price 934.299988, total balance 9030.280152\n", "day 128, sell 1 unit at price 932.169983, investment -0.227979 %, total balance 9962.450135,\n", "day 141: buy 1 unit at price 971.469971, total balance 8990.980164\n", "day 142, sell 1 unit at price 975.880005, investment 0.453955 %, total balance 9966.860169,\n", "day 152: buy 1 unit at price 953.400024, total balance 9013.460145\n", "day 153, sell 1 unit at price 950.760010, investment -0.276905 %, total balance 9964.220155,\n", "day 156: buy 1 unit at price 957.369995, total balance 9006.850160\n", "day 157, sell 1 unit at price 950.630005, investment -0.704011 %, total balance 9957.480165,\n", "day 166: buy 1 unit at price 898.700012, total balance 9058.780153\n", "day 167, sell 1 unit at price 911.710022, investment 1.447648 %, total balance 9970.490175,\n", "day 172: buy 1 unit at price 943.830017, total balance 9026.660158\n", "day 173, sell 1 unit at price 947.159973, investment 0.352813 %, total balance 9973.820131,\n", "day 185: buy 1 unit at price 930.500000, total balance 9043.320131\n", "day 186: buy 1 unit at price 930.830017, total balance 8112.490114\n", "day 187, sell 1 unit at price 930.390015, investment -0.011820 %, total balance 9042.880129,\n", "day 188, sell 1 unit at price 923.650024, investment -0.771354 %, total balance 9966.530153,\n", "day 193: buy 1 unit at price 907.239990, total balance 9059.290163\n", "day 194, sell 1 unit at price 914.390015, investment 0.788107 %, total balance 9973.680178,\n", "day 197: buy 1 unit at price 926.960022, total balance 9046.720156\n", "day 199, sell 1 unit at price 910.669983, investment -1.757362 %, total balance 9957.390139,\n", "day 211: buy 1 unit at price 927.809998, total balance 9029.580141\n", "day 212, sell 1 unit at price 935.950012, investment 0.877336 %, total balance 9965.530153,\n", "day 213: buy 1 unit at price 926.500000, total balance 9039.030153\n", "day 214, sell 1 unit at price 929.080017, investment 0.278469 %, total balance 9968.110170,\n" ] } ], "source": [ "states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = plt.figure(figsize = (15,5))\n", "plt.plot(close, color='r', lw=2.)\n", "plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)\n", "plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)\n", "plt.title('total gains %f, total investment %f%%'%(total_gains, invest))\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }