{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "sns.set()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateOpenHighLowCloseAdj CloseVolume
02016-11-02778.200012781.650024763.450012768.700012768.7000121872400
12016-11-03767.250000769.950012759.030029762.130005762.1300051943200
22016-11-04750.659973770.359985750.560974762.020020762.0200202134800
32016-11-07774.500000785.190002772.549988782.520020782.5200201585100
42016-11-08783.400024795.632996780.190002790.510010790.5100101350800
\n", "
" ], "text/plain": [ " Date Open High Low Close Adj Close \\\n", "0 2016-11-02 778.200012 781.650024 763.450012 768.700012 768.700012 \n", "1 2016-11-03 767.250000 769.950012 759.030029 762.130005 762.130005 \n", "2 2016-11-04 750.659973 770.359985 750.560974 762.020020 762.020020 \n", "3 2016-11-07 774.500000 785.190002 772.549988 782.520020 782.520020 \n", "4 2016-11-08 783.400024 795.632996 780.190002 790.510010 790.510010 \n", "\n", " Volume \n", "0 1872400 \n", "1 1943200 \n", "2 2134800 \n", "3 1585100 \n", "4 1350800 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('../dataset/GOOG-year.csv')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from collections import deque\n", "import random\n", "\n", "class Agent:\n", "\n", " LEARNING_RATE = 0.003\n", " BATCH_SIZE = 32\n", " LAYER_SIZE = 128\n", " OUTPUT_SIZE = 3\n", " EPSILON = 0.5\n", " DECAY_RATE = 0.005\n", " MIN_EPSILON = 0.1\n", " GAMMA = 0.99\n", " MEMORIES = deque()\n", " COPY = 1000\n", " T_COPY = 0\n", " MEMORY_SIZE = 300\n", " \n", " def __init__(self, state_size, window_size, trend, skip):\n", " self.state_size = state_size\n", " self.window_size = window_size\n", " self.half_window = window_size // 2\n", " self.trend = trend\n", " self.skip = skip\n", " tf.reset_default_graph()\n", " self.INITIAL_FEATURES = np.zeros((4, self.state_size))\n", " self.X = tf.placeholder(tf.float32, (None, None, self.state_size))\n", " self.Y = tf.placeholder(tf.float32, (None, None, self.state_size))\n", " self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE))\n", " self.ACTION = tf.placeholder(tf.float32, (None))\n", " self.REWARD = tf.placeholder(tf.float32, (None))\n", " self.batch_size = tf.shape(self.ACTION)[0]\n", " self.seq_len = tf.shape(self.X)[1]\n", " \n", " with tf.variable_scope('curiosity_model'):\n", " action = tf.reshape(self.ACTION, (-1,1,1))\n", " repeat_action = tf.tile(action, [1,self.seq_len,1])\n", " state_action = tf.concat([self.X, repeat_action], axis=-1)\n", " save_state = tf.identity(self.Y)\n", " cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)\n", " self.rnn,last_state = tf.nn.dynamic_rnn(inputs=state_action,cell=cell,\n", " dtype=tf.float32,\n", " initial_state=self.hidden_layer)\n", " self.curiosity_logits = tf.layers.dense(self.rnn[:,-1], self.state_size)\n", " self.curiosity_cost = tf.reduce_sum(tf.square(save_state[:,-1] - self.curiosity_logits), axis=1)\n", " \n", " self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)\\\n", " .minimize(tf.reduce_mean(self.curiosity_cost))\n", " \n", " total_reward = tf.add(self.curiosity_cost, self.REWARD)\n", " \n", " with tf.variable_scope(\"q_model\"):\n", " with tf.variable_scope(\"eval_net\"):\n", " cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)\n", " rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,\n", " dtype=tf.float32,\n", " initial_state=self.hidden_layer)\n", " self.logits = tf.layers.dense(rnn[:,-1], self.OUTPUT_SIZE)\n", " \n", " with tf.variable_scope(\"target_net\"):\n", " cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple = False)\n", " rnn,last_state = tf.nn.dynamic_rnn(inputs=self.Y,cell=cell,\n", " dtype=tf.float32,\n", " initial_state=self.hidden_layer)\n", " y_q = tf.layers.dense(rnn[:,-1], self.OUTPUT_SIZE)\n", " \n", " q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)\n", " action = tf.cast(self.ACTION, tf.int32)\n", " action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)\n", " q = tf.gather_nd(params=self.logits, indices=action_indices)\n", " self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)\n", " self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(\n", " self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, \"q_model/eval_net\"))\n", " \n", " t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')\n", " e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')\n", " self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]\n", " \n", " self.sess = tf.InteractiveSession()\n", " self.sess.run(tf.global_variables_initializer())\n", " \n", " def _memorize(self, state, action, reward, new_state, done, rnn_state):\n", " self.MEMORIES.append((state, action, reward, new_state, done, rnn_state))\n", " if len(self.MEMORIES) > self.MEMORY_SIZE:\n", " self.MEMORIES.popleft()\n", " \n", " def get_state(self, t):\n", " window_size = self.window_size + 1\n", " d = t - window_size + 1\n", " block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]\n", " res = []\n", " for i in range(window_size - 1):\n", " res.append(block[i + 1] - block[i])\n", " return np.array(res)\n", " \n", " def _construct_memories(self, replay):\n", " states = np.array([a[0] for a in replay])\n", " actions = np.array([a[1] for a in replay])\n", " rewards = np.array([a[2] for a in replay])\n", " new_states = np.array([a[3] for a in replay])\n", " init_values = np.array([a[-1] for a in replay])\n", " if (self.T_COPY + 1) % self.COPY == 0:\n", " self.sess.run(self.target_replace_op)\n", " \n", " cost, _ = self.sess.run([self.cost, self.optimizer], feed_dict = {\n", " self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards,\n", " self.hidden_layer: init_values\n", " })\n", " \n", " if (self.T_COPY + 1) % self.COPY == 0:\n", " self.sess.run(self.curiosity_optimizer, feed_dict = {\n", " self.X: states, self.Y: new_states, self.ACTION: actions, self.REWARD: rewards,\n", " self.hidden_layer: init_values\n", " })\n", " return cost\n", " \n", " def buy(self, initial_money):\n", " starting_money = initial_money\n", " states_sell = []\n", " states_buy = []\n", " inventory = []\n", " state = self.get_state(0)\n", " init_value = np.zeros((1, 2 * self.LAYER_SIZE))\n", " for k in range(self.INITIAL_FEATURES.shape[0]):\n", " self.INITIAL_FEATURES[k,:] = state\n", " for t in range(0, len(self.trend) - 1, self.skip):\n", " \n", " if np.random.rand() < self.EPSILON:\n", " action = np.random.randint(self.OUTPUT_SIZE)\n", " else:\n", " action, last_state = self.sess.run([self.logits,\n", " self.last_state],\n", " feed_dict={self.X:[self.INITIAL_FEATURES],\n", " self.hidden_layer:init_value})\n", " action, init_value = np.argmax(action[0]), last_state\n", " \n", " next_state = self.get_state(t + 1)\n", " \n", " if action == 1 and initial_money >= self.trend[t]:\n", " inventory.append(self.trend[t])\n", " initial_money -= self.trend[t]\n", " states_buy.append(t)\n", " print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))\n", " \n", " elif action == 2 and len(inventory):\n", " bought_price = inventory.pop(0)\n", " initial_money += self.trend[t]\n", " states_sell.append(t)\n", " try:\n", " invest = ((close[t] - bought_price) / bought_price) * 100\n", " except:\n", " invest = 0\n", " print(\n", " 'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'\n", " % (t, close[t], invest, initial_money)\n", " )\n", " \n", " new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)\n", " self.INITIAL_FEATURES = new_state\n", " invest = ((initial_money - starting_money) / starting_money) * 100\n", " total_gains = initial_money - starting_money\n", " return states_buy, states_sell, total_gains, invest\n", " \n", " def train(self, iterations, checkpoint, initial_money):\n", " for i in range(iterations):\n", " total_profit = 0\n", " inventory = []\n", " state = self.get_state(0)\n", " starting_money = initial_money\n", " init_value = np.zeros((1, 2 * self.LAYER_SIZE))\n", " for k in range(self.INITIAL_FEATURES.shape[0]):\n", " self.INITIAL_FEATURES[k,:] = state\n", " for t in range(0, len(self.trend) - 1, self.skip):\n", " if np.random.rand() < self.EPSILON:\n", " action = np.random.randint(self.OUTPUT_SIZE)\n", " else:\n", " action, last_state = self.sess.run([self.logits,\n", " self.last_state],\n", " feed_dict={self.X:[self.INITIAL_FEATURES],\n", " self.hidden_layer:init_value})\n", " action, init_value = np.argmax(action[0]), last_state\n", " \n", " next_state = self.get_state(t + 1)\n", " \n", " if action == 1 and starting_money >= self.trend[t]:\n", " inventory.append(self.trend[t])\n", " starting_money -= self.trend[t]\n", " \n", " elif action == 2 and len(inventory) > 0:\n", " bought_price = inventory.pop(0)\n", " total_profit += self.trend[t] - bought_price\n", " starting_money += self.trend[t]\n", " \n", " invest = ((starting_money - initial_money) / initial_money)\n", " new_state = np.append([self.get_state(t + 1)], self.INITIAL_FEATURES[:3, :], axis = 0)\n", " self._memorize(self.INITIAL_FEATURES, action, invest, new_state, \n", " starting_money < initial_money, init_value[0])\n", " self.INITIAL_FEATURES = new_state\n", " batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)\n", " replay = random.sample(self.MEMORIES, batch_size)\n", " cost = self._construct_memories(replay)\n", " self.T_COPY += 1\n", " self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)\n", " if (i+1) % checkpoint == 0:\n", " print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,\n", " starting_money))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True.\n", "WARNING:tensorflow:: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True.\n", "WARNING:tensorflow:: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True.\n", "epoch: 10, total rewards: 685.860168.3, cost: 4139534.500000, total money: 977.580137\n", "epoch: 20, total rewards: 1724.255003.3, cost: 5132677.500000, total money: 5851.904966\n", "epoch: 30, total rewards: 493.970035.3, cost: 3979546.750000, total money: 8528.600039\n", "epoch: 40, total rewards: 1580.255128.3, cost: 5099559.000000, total money: 4018.855103\n", "epoch: 50, total rewards: 1467.990231.3, cost: 4410721.500000, total money: 8490.720211\n", "epoch: 60, total rewards: 1285.420161.3, cost: 3993190.000000, total money: 2688.440118\n", "epoch: 70, total rewards: 391.130068.3, cost: 3420379.000000, total money: 6491.710085\n", "epoch: 80, total rewards: 1276.110108.3, cost: 3443612.750000, total money: 3698.110047\n", "epoch: 90, total rewards: 672.475340.3, cost: 2882908.000000, total money: 208.605285\n", "epoch: 100, total rewards: 706.604982.3, cost: 3108476.500000, total money: 1169.724916\n", "epoch: 110, total rewards: 979.940367.3, cost: 2024909.750000, total money: 3200.720335\n", "epoch: 120, total rewards: 853.199893.3, cost: 4572564.500000, total money: 6070.309879\n", "epoch: 130, total rewards: 1339.975223.3, cost: 3904469.500000, total money: 7475.465274\n", "epoch: 140, total rewards: 1136.924864.3, cost: 4352429.000000, total money: 4448.164854\n", "epoch: 150, total rewards: 1499.745116.3, cost: 2398584.500000, total money: 3999.355042\n", "epoch: 160, total rewards: 481.755190.3, cost: 3168836.250000, total money: 7573.215212\n", "epoch: 170, total rewards: 1733.610290.3, cost: 1907320.875000, total money: 6940.950254\n", "epoch: 180, total rewards: 390.074828.3, cost: 2862924.000000, total money: 5516.364805\n", "epoch: 190, total rewards: 714.815121.3, cost: 2666878.750000, total money: 9726.615109\n", "epoch: 200, total rewards: 1474.129822.3, cost: 3016419.000000, total money: 1901.589906\n" ] } ], "source": [ "close = df.Close.values.tolist()\n", "initial_money = 10000\n", "window_size = 30\n", "skip = 1\n", "batch_size = 32\n", "agent = Agent(state_size = window_size, \n", " window_size = window_size, \n", " trend = close, \n", " skip = skip)\n", "agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "day 0: buy 1 unit at price 768.700012, total balance 9231.299988\n", "day 1, sell 1 unit at price 762.130005, investment -0.854691 %, total balance 9993.429993,\n", "day 4: buy 1 unit at price 790.510010, total balance 9202.919983\n", "day 5: buy 1 unit at price 785.309998, total balance 8417.609985\n", "day 8: buy 1 unit at price 736.080017, total balance 7681.529968\n", "day 9: buy 1 unit at price 758.489990, total balance 6923.039978\n", "day 11, sell 1 unit at price 771.229980, investment -2.438936 %, total balance 7694.269958,\n", "day 13: buy 1 unit at price 769.200012, total balance 6925.069946\n", "day 17: buy 1 unit at price 768.239990, total balance 6156.829956\n", "day 19, sell 1 unit at price 758.039978, investment -3.472517 %, total balance 6914.869934,\n", "day 25, sell 1 unit at price 776.419983, investment 5.480378 %, total balance 7691.289917,\n", "day 26: buy 1 unit at price 789.289978, total balance 6901.999939\n", "day 28: buy 1 unit at price 796.099976, total balance 6105.899963\n", "day 31: buy 1 unit at price 790.799988, total balance 5315.099975\n", "day 40, sell 1 unit at price 771.820007, investment 1.757441 %, total balance 6086.919982,\n", "day 46, sell 1 unit at price 804.789978, investment 4.626881 %, total balance 6891.709960,\n", "day 47, sell 1 unit at price 807.909973, investment 5.163749 %, total balance 7699.619933,\n", "day 50: buy 1 unit at price 804.609985, total balance 6895.009948\n", "day 57: buy 1 unit at price 832.150024, total balance 6062.859924\n", "day 58, sell 1 unit at price 823.309998, investment 4.310205 %, total balance 6886.169922,\n", "day 61: buy 1 unit at price 795.695007, total balance 6090.474915\n", "day 62: buy 1 unit at price 798.530029, total balance 5291.944886\n", "day 70: buy 1 unit at price 820.450012, total balance 4471.494874\n", "day 73: buy 1 unit at price 828.070007, total balance 3643.424867\n", "day 76: buy 1 unit at price 831.330017, total balance 2812.094850\n", "day 85: buy 1 unit at price 835.369995, total balance 1976.724855\n", "day 89, sell 1 unit at price 845.619995, investment 6.220327 %, total balance 2822.344850,\n", "day 91: buy 1 unit at price 848.780029, total balance 1973.564821\n", "day 98: buy 1 unit at price 819.510010, total balance 1154.054811\n", "day 100: buy 1 unit at price 831.409973, total balance 322.644838\n", "day 102, sell 1 unit at price 829.559998, investment 4.901367 %, total balance 1152.204836,\n", "day 111, sell 1 unit at price 823.559998, investment 2.355180 %, total balance 1975.764834,\n", "day 113: buy 1 unit at price 836.820007, total balance 1138.944827\n", "day 114, sell 1 unit at price 838.210022, investment 0.728234 %, total balance 1977.154849,\n", "day 117: buy 1 unit at price 862.760010, total balance 1114.394839\n", "day 118: buy 1 unit at price 872.299988, total balance 242.094851\n", "day 132, sell 1 unit at price 937.080017, investment 17.768744 %, total balance 1179.174868,\n", "day 138: buy 1 unit at price 948.820007, total balance 230.354861\n", "day 139, sell 1 unit at price 954.960022, investment 19.589745 %, total balance 1185.314883,\n", "day 140: buy 1 unit at price 969.539978, total balance 215.774905\n", "day 154, sell 1 unit at price 942.309998, investment 14.852823 %, total balance 1158.084903,\n", "day 158, sell 1 unit at price 959.450012, investment 15.865809 %, total balance 2117.534915,\n", "day 160: buy 1 unit at price 965.590027, total balance 1151.944888\n", "day 168: buy 1 unit at price 906.690002, total balance 245.254886\n", "day 169, sell 1 unit at price 918.590027, investment 10.496434 %, total balance 1163.844913,\n", "day 176: buy 1 unit at price 965.400024, total balance 198.444889\n", "day 189, sell 1 unit at price 927.960022, investment 11.083715 %, total balance 1126.404911,\n", "day 191: buy 1 unit at price 926.789978, total balance 199.614933\n", "day 195, sell 1 unit at price 922.669983, investment 8.705430 %, total balance 1122.284916,\n", "day 200, sell 1 unit at price 906.659973, investment 10.634399 %, total balance 2028.944889,\n", "day 201: buy 1 unit at price 924.690002, total balance 1104.254887\n", "day 202, sell 1 unit at price 927.000000, investment 11.497339 %, total balance 2031.254887,\n", "day 206: buy 1 unit at price 921.289978, total balance 1109.964909\n", "day 211: buy 1 unit at price 927.809998, total balance 182.154911\n", "day 220, sell 1 unit at price 921.809998, investment 10.156305 %, total balance 1103.964909,\n", "day 226, sell 1 unit at price 944.489990, investment 9.473084 %, total balance 2048.454899,\n", "day 228, sell 1 unit at price 959.109985, investment 9.951851 %, total balance 3007.564884,\n", "day 230: buy 1 unit at price 957.789978, total balance 2049.774906\n", "day 231, sell 1 unit at price 951.679993, investment 0.301426 %, total balance 3001.454899,\n", "day 234: buy 1 unit at price 977.000000, total balance 2024.454899\n", "day 237: buy 1 unit at price 987.830017, total balance 1036.624882\n", "day 238, sell 1 unit at price 989.679993, investment 2.077275 %, total balance 2026.304875,\n", "day 240: buy 1 unit at price 992.179993, total balance 1034.124882\n", "day 241: buy 1 unit at price 992.809998, total balance 41.314884\n", "day 242, sell 1 unit at price 984.450012, investment 1.953208 %, total balance 1025.764896,\n", "day 248, sell 1 unit at price 1019.270020, investment 12.416594 %, total balance 2045.034916,\n" ] } ], "source": [ "states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = plt.figure(figsize = (15,5))\n", "plt.plot(close, color='r', lw=2.)\n", "plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)\n", "plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)\n", "plt.title('total gains %f, total investment %f%%'%(total_gains, invest))\n", "plt.legend()\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }