mirror of
https://github.com/13hannes11/UU_ai_course.git
synced 2024-09-03 21:10:58 +02:00
add ipynb for assignments in python
This commit is contained in:
1040
assignment_4.ipynb
Normal file
1040
assignment_4.ipynb
Normal file
File diff suppressed because one or more lines are too long
711
assignment_5.ipynb
Normal file
711
assignment_5.ipynb
Normal file
@@ -0,0 +1,711 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
|
||||||
|
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:24.034522Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:24.033481Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:24.040184Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:24.039289Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.026939,
|
||||||
|
"end_time": "2020-10-23T07:01:24.040385",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:24.013446",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/kaggle/input/rl-assignment/samplesubmission.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
|
||||||
|
"# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
|
||||||
|
"# For example, here's several helpful packages to load\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np # linear algebra\n",
|
||||||
|
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
|
||||||
|
"\n",
|
||||||
|
"# Input data files are available in the read-only \"../input/\" directory\n",
|
||||||
|
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
|
||||||
|
"\n",
|
||||||
|
"import os\n",
|
||||||
|
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
|
||||||
|
" for filename in filenames:\n",
|
||||||
|
" print(os.path.join(dirname, filename))\n",
|
||||||
|
"\n",
|
||||||
|
"# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
|
||||||
|
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {
|
||||||
|
"_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
|
||||||
|
"_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:24.069878Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:24.068972Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:25.495962Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:25.494876Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 1.44331,
|
||||||
|
"end_time": "2020-10-23T07:01:25.496099",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:24.052789",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"+---------+\n",
|
||||||
|
"|\u001b[34;1mR\u001b[0m: |\u001b[43m \u001b[0m: :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import gym\n",
|
||||||
|
"\n",
|
||||||
|
"env = gym.make(\"Taxi-v3\").env\n",
|
||||||
|
"\n",
|
||||||
|
"env.reset() # reset environment to a new, random state\n",
|
||||||
|
"env.render()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:25.526480Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:25.525487Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:25.530094Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:25.529110Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.022214,
|
||||||
|
"end_time": "2020-10-23T07:01:25.530278",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:25.508064",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Action Space Discrete(6)\n",
|
||||||
|
"State Space Discrete(500)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# action space\n",
|
||||||
|
"print(\"Action Space {}\".format(env.action_space))\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"State Space {}\".format(env.observation_space))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:25.561002Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:25.560068Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:25.563851Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:25.564479Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.022719,
|
||||||
|
"end_time": "2020-10-23T07:01:25.564666",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:25.541947",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"State: 328\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|\u001b[35mR\u001b[0m: | : :G|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| |\u001b[43m \u001b[0m: | : |\n",
|
||||||
|
"|\u001b[34;1mY\u001b[0m| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# state encoding\n",
|
||||||
|
"\n",
|
||||||
|
"state = env.encode(3, 1, 2, 0) # (taxi row, taxi column, passenger index, destination index)\n",
|
||||||
|
"print(\"State:\", state)\n",
|
||||||
|
"\n",
|
||||||
|
"env.s = state\n",
|
||||||
|
"env.render()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:25.599957Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:25.599051Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:25.604442Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:25.603711Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.027519,
|
||||||
|
"end_time": "2020-10-23T07:01:25.604618",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:25.577099",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{0: [(1.0, 428, -1, False)],\n",
|
||||||
|
" 1: [(1.0, 228, -1, False)],\n",
|
||||||
|
" 2: [(1.0, 348, -1, False)],\n",
|
||||||
|
" 3: [(1.0, 328, -1, False)],\n",
|
||||||
|
" 4: [(1.0, 328, -10, False)],\n",
|
||||||
|
" 5: [(1.0, 328, -10, False)]}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# rewards table\n",
|
||||||
|
"env.P[328]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:25.639997Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:25.638924Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:25.643756Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:25.642988Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.025446,
|
||||||
|
"end_time": "2020-10-23T07:01:25.643881",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:25.618435",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([[0., 0., 0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0., 0., 0.],\n",
|
||||||
|
" ...,\n",
|
||||||
|
" [0., 0., 0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0., 0., 0.]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Defining q table\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"q_table = np.zeros([env.observation_space.n, env.action_space.n])\n",
|
||||||
|
"q_table"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:25.677831Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:25.676776Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:01:25.680343Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:01:25.679723Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.022979,
|
||||||
|
"end_time": "2020-10-23T07:01:25.680493",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:25.657514",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# hyperparameters\n",
|
||||||
|
"alpha = 0.1\n",
|
||||||
|
"gamma = 0.6\n",
|
||||||
|
"epsiolon = 0.1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:01:25.765603Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:01:25.726090Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:10:37.138058Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:10:37.138886Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 551.443876,
|
||||||
|
"end_time": "2020-10-23T07:10:37.139105",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:01:25.695229",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Epoch: 999999 Iterations: 14810058\r\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import random\n",
|
||||||
|
"\n",
|
||||||
|
"max_epochs = 1000000 # stops after x epochs or max_iterations\n",
|
||||||
|
"max_iterations = -1 # 1000000 # -1 to not stop based on iterations\n",
|
||||||
|
"\n",
|
||||||
|
"iteration = 0\n",
|
||||||
|
"for epoch in range(max_epochs):\n",
|
||||||
|
" done = False\n",
|
||||||
|
" new_state = env.reset()\n",
|
||||||
|
" \n",
|
||||||
|
" while not done and (iteration < max_iterations or max_iterations == -1):\n",
|
||||||
|
" iteration += 1\n",
|
||||||
|
" old_state = new_state\n",
|
||||||
|
" if random.uniform(0,1) < epsiolon:\n",
|
||||||
|
" action = env.action_space.sample()\n",
|
||||||
|
" else:\n",
|
||||||
|
" action = np.argmax(q_table[old_state])\n",
|
||||||
|
" \n",
|
||||||
|
" new_state, reward, done, info = env.step(action)\n",
|
||||||
|
" env.s = new_state # update state of the environment (not sure if )\n",
|
||||||
|
" \n",
|
||||||
|
" q_table[old_state, action] = q_table[old_state][action] + alpha * (reward + gamma * np.max(q_table[new_state]) - q_table[old_state][action])\n",
|
||||||
|
" \n",
|
||||||
|
" if iteration >= max_iterations and max_iterations != -1:\n",
|
||||||
|
" break\n",
|
||||||
|
"print(\"Epoch: {} Iterations: {}\\r\".format(epoch, iteration))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:10:37.176308Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:10:37.175458Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:10:37.179942Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:10:37.180484Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.026804,
|
||||||
|
"end_time": "2020-10-23T07:10:37.180659",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:10:37.153855",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([[ 0. , 0. , 0. , 0. ,\n",
|
||||||
|
" 0. , 0. ],\n",
|
||||||
|
" [ -2.41837066, -2.3639511 , -2.41837066, -2.3639511 ,\n",
|
||||||
|
" -2.27325184, -11.3639511 ],\n",
|
||||||
|
" [ -1.870144 , -1.45024 , -1.870144 , -1.45024 ,\n",
|
||||||
|
" -0.7504 , -10.45024 ],\n",
|
||||||
|
" ...,\n",
|
||||||
|
" [ -0.75955193, 0.416 , -0.75945955, -1.44283176,\n",
|
||||||
|
" -9.1295566 , -9.26328213],\n",
|
||||||
|
" [ -2.26879216, -2.1220864 , -2.24661946, -2.1220864 ,\n",
|
||||||
|
" -10.7816137 , -10.43290413],\n",
|
||||||
|
" [ 5.6 , 2.36 , 5.6 , 11. ,\n",
|
||||||
|
" -3.4 , -3.4 ]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"q_table"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:10:37.220686Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:10:37.219859Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:10:37.228862Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:10:37.229575Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.033495,
|
||||||
|
"end_time": "2020-10-23T07:10:37.229780",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:10:37.196285",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| :\u001b[43m \u001b[0m: : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (East)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : :\u001b[43m \u001b[0m: : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (East)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : :\u001b[43m \u001b[0m: |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (East)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : |\u001b[43m \u001b[0m: |\n",
|
||||||
|
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (South)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |\u001b[34;1m\u001b[43mB\u001b[0m\u001b[0m: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (South)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |\u001b[42mB\u001b[0m: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (Pickup)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : |\u001b[42m_\u001b[0m: |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (North)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : :\u001b[42m_\u001b[0m: |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (North)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | :\u001b[42m_\u001b[0m: |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (North)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | :\u001b[42m_\u001b[0m:\u001b[35mG\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (North)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (East)\n",
|
||||||
|
"+---------+\n",
|
||||||
|
"|R: | : :\u001b[35m\u001b[34;1m\u001b[43mG\u001b[0m\u001b[0m\u001b[0m|\n",
|
||||||
|
"| : | : : |\n",
|
||||||
|
"| : : : : |\n",
|
||||||
|
"| | : | : |\n",
|
||||||
|
"|Y| : |B: |\n",
|
||||||
|
"+---------+\n",
|
||||||
|
" (Dropoff)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# example game\n",
|
||||||
|
"\n",
|
||||||
|
"new_state = env.reset()\n",
|
||||||
|
"\n",
|
||||||
|
"done = False\n",
|
||||||
|
"max_iterations = 100\n",
|
||||||
|
"iteration = 0\n",
|
||||||
|
"while not done and (iteration <= max_iterations or max_iterations == -1):\n",
|
||||||
|
" old_state = new_state\n",
|
||||||
|
" action = np.argmax(q_table[old_state])\n",
|
||||||
|
" new_state, reward, done, info = env.step(action)\n",
|
||||||
|
" env.s = new_state\n",
|
||||||
|
" env.render()\n",
|
||||||
|
" iteration += 1\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {
|
||||||
|
"execution": {
|
||||||
|
"iopub.execute_input": "2020-10-23T07:10:37.269410Z",
|
||||||
|
"iopub.status.busy": "2020-10-23T07:10:37.268454Z",
|
||||||
|
"iopub.status.idle": "2020-10-23T07:10:37.659363Z",
|
||||||
|
"shell.execute_reply": "2020-10-23T07:10:37.658561Z"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 0.413907,
|
||||||
|
"end_time": "2020-10-23T07:10:37.659523",
|
||||||
|
"exception": false,
|
||||||
|
"start_time": "2020-10-23T07:10:37.245616",
|
||||||
|
"status": "completed"
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" <th>Value</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>0.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>0.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>0.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>0.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>0.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2995</th>\n",
|
||||||
|
" <td>2996</td>\n",
|
||||||
|
" <td>2.36</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2996</th>\n",
|
||||||
|
" <td>2997</td>\n",
|
||||||
|
" <td>5.60</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2997</th>\n",
|
||||||
|
" <td>2998</td>\n",
|
||||||
|
" <td>11.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2998</th>\n",
|
||||||
|
" <td>2999</td>\n",
|
||||||
|
" <td>-3.40</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2999</th>\n",
|
||||||
|
" <td>3000</td>\n",
|
||||||
|
" <td>-3.40</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>3000 rows × 2 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Id Value\n",
|
||||||
|
"0 1 0.00\n",
|
||||||
|
"1 2 0.00\n",
|
||||||
|
"2 3 0.00\n",
|
||||||
|
"3 4 0.00\n",
|
||||||
|
"4 5 0.00\n",
|
||||||
|
"... ... ...\n",
|
||||||
|
"2995 2996 2.36\n",
|
||||||
|
"2996 2997 5.60\n",
|
||||||
|
"2997 2998 11.00\n",
|
||||||
|
"2998 2999 -3.40\n",
|
||||||
|
"2999 3000 -3.40\n",
|
||||||
|
"\n",
|
||||||
|
"[3000 rows x 2 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#generate output file\n",
|
||||||
|
"df = pd.DataFrame(q_table.ravel())\n",
|
||||||
|
"\n",
|
||||||
|
"df.index += 1\n",
|
||||||
|
"df.to_csv(\"q_table.csv\", index_label=\"Id\", header=[\"Value\"])\n",
|
||||||
|
"df\n",
|
||||||
|
"\n",
|
||||||
|
"# load data again to test correct file format\n",
|
||||||
|
"pd.read_csv(\"q_table.csv\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.6"
|
||||||
|
},
|
||||||
|
"papermill": {
|
||||||
|
"duration": 559.137382,
|
||||||
|
"end_time": "2020-10-23T07:10:37.784395",
|
||||||
|
"environment_variables": {},
|
||||||
|
"exception": null,
|
||||||
|
"input_path": "__notebook__.ipynb",
|
||||||
|
"output_path": "__notebook__.ipynb",
|
||||||
|
"parameters": {},
|
||||||
|
"start_time": "2020-10-23T07:01:18.647013",
|
||||||
|
"version": "2.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user