add ipynb for assignments in python

This commit is contained in:
2020-10-23 16:28:25 +02:00
parent ee5457b4a0
commit bd35d67ee0
2 changed files with 1751 additions and 0 deletions

1040
assignment_4.ipynb Normal file

File diff suppressed because one or more lines are too long

711
assignment_5.ipynb Normal file
View File

@@ -0,0 +1,711 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2020-10-23T07:01:24.034522Z",
"iopub.status.busy": "2020-10-23T07:01:24.033481Z",
"iopub.status.idle": "2020-10-23T07:01:24.040184Z",
"shell.execute_reply": "2020-10-23T07:01:24.039289Z"
},
"papermill": {
"duration": 0.026939,
"end_time": "2020-10-23T07:01:24.040385",
"exception": false,
"start_time": "2020-10-23T07:01:24.013446",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/kaggle/input/rl-assignment/samplesubmission.csv\n"
]
}
],
"source": [
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
"# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
"# For example, here's several helpful packages to load\n",
"\n",
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the read-only \"../input/\" directory\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n",
"\n",
"# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
"_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
"execution": {
"iopub.execute_input": "2020-10-23T07:01:24.069878Z",
"iopub.status.busy": "2020-10-23T07:01:24.068972Z",
"iopub.status.idle": "2020-10-23T07:01:25.495962Z",
"shell.execute_reply": "2020-10-23T07:01:25.494876Z"
},
"papermill": {
"duration": 1.44331,
"end_time": "2020-10-23T07:01:25.496099",
"exception": false,
"start_time": "2020-10-23T07:01:24.052789",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+\n",
"|\u001b[34;1mR\u001b[0m: |\u001b[43m \u001b[0m: :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |B: |\n",
"+---------+\n",
"\n"
]
}
],
"source": [
"import gym\n",
"\n",
"env = gym.make(\"Taxi-v3\").env\n",
"\n",
"env.reset() # reset environment to a new, random state\n",
"env.render()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:01:25.526480Z",
"iopub.status.busy": "2020-10-23T07:01:25.525487Z",
"iopub.status.idle": "2020-10-23T07:01:25.530094Z",
"shell.execute_reply": "2020-10-23T07:01:25.529110Z"
},
"papermill": {
"duration": 0.022214,
"end_time": "2020-10-23T07:01:25.530278",
"exception": false,
"start_time": "2020-10-23T07:01:25.508064",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Action Space Discrete(6)\n",
"State Space Discrete(500)\n"
]
}
],
"source": [
"# action space\n",
"print(\"Action Space {}\".format(env.action_space))\n",
"\n",
"print(\"State Space {}\".format(env.observation_space))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:01:25.561002Z",
"iopub.status.busy": "2020-10-23T07:01:25.560068Z",
"iopub.status.idle": "2020-10-23T07:01:25.563851Z",
"shell.execute_reply": "2020-10-23T07:01:25.564479Z"
},
"papermill": {
"duration": 0.022719,
"end_time": "2020-10-23T07:01:25.564666",
"exception": false,
"start_time": "2020-10-23T07:01:25.541947",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State: 328\n",
"+---------+\n",
"|\u001b[35mR\u001b[0m: | : :G|\n",
"| : | : : |\n",
"| : : : : |\n",
"| |\u001b[43m \u001b[0m: | : |\n",
"|\u001b[34;1mY\u001b[0m| : |B: |\n",
"+---------+\n",
"\n"
]
}
],
"source": [
"# state encoding\n",
"\n",
"state = env.encode(3, 1, 2, 0) # (taxi row, taxi column, passenger index, destination index)\n",
"print(\"State:\", state)\n",
"\n",
"env.s = state\n",
"env.render()\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:01:25.599957Z",
"iopub.status.busy": "2020-10-23T07:01:25.599051Z",
"iopub.status.idle": "2020-10-23T07:01:25.604442Z",
"shell.execute_reply": "2020-10-23T07:01:25.603711Z"
},
"papermill": {
"duration": 0.027519,
"end_time": "2020-10-23T07:01:25.604618",
"exception": false,
"start_time": "2020-10-23T07:01:25.577099",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{0: [(1.0, 428, -1, False)],\n",
" 1: [(1.0, 228, -1, False)],\n",
" 2: [(1.0, 348, -1, False)],\n",
" 3: [(1.0, 328, -1, False)],\n",
" 4: [(1.0, 328, -10, False)],\n",
" 5: [(1.0, 328, -10, False)]}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# rewards table\n",
"env.P[328]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:01:25.639997Z",
"iopub.status.busy": "2020-10-23T07:01:25.638924Z",
"iopub.status.idle": "2020-10-23T07:01:25.643756Z",
"shell.execute_reply": "2020-10-23T07:01:25.642988Z"
},
"papermill": {
"duration": 0.025446,
"end_time": "2020-10-23T07:01:25.643881",
"exception": false,
"start_time": "2020-10-23T07:01:25.618435",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([[0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0.],\n",
" ...,\n",
" [0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0.]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Defining q table\n",
"\n",
"import numpy as np\n",
"q_table = np.zeros([env.observation_space.n, env.action_space.n])\n",
"q_table"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:01:25.677831Z",
"iopub.status.busy": "2020-10-23T07:01:25.676776Z",
"iopub.status.idle": "2020-10-23T07:01:25.680343Z",
"shell.execute_reply": "2020-10-23T07:01:25.679723Z"
},
"papermill": {
"duration": 0.022979,
"end_time": "2020-10-23T07:01:25.680493",
"exception": false,
"start_time": "2020-10-23T07:01:25.657514",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# hyperparameters\n",
"alpha = 0.1\n",
"gamma = 0.6\n",
"epsiolon = 0.1"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:01:25.765603Z",
"iopub.status.busy": "2020-10-23T07:01:25.726090Z",
"iopub.status.idle": "2020-10-23T07:10:37.138058Z",
"shell.execute_reply": "2020-10-23T07:10:37.138886Z"
},
"papermill": {
"duration": 551.443876,
"end_time": "2020-10-23T07:10:37.139105",
"exception": false,
"start_time": "2020-10-23T07:01:25.695229",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch: 999999 Iterations: 14810058\r\n"
]
}
],
"source": [
"import random\n",
"\n",
"max_epochs = 1000000 # stops after x epochs or max_iterations\n",
"max_iterations = -1 # 1000000 # -1 to not stop based on iterations\n",
"\n",
"iteration = 0\n",
"for epoch in range(max_epochs):\n",
" done = False\n",
" new_state = env.reset()\n",
" \n",
" while not done and (iteration < max_iterations or max_iterations == -1):\n",
" iteration += 1\n",
" old_state = new_state\n",
" if random.uniform(0,1) < epsiolon:\n",
" action = env.action_space.sample()\n",
" else:\n",
" action = np.argmax(q_table[old_state])\n",
" \n",
" new_state, reward, done, info = env.step(action)\n",
" env.s = new_state # update state of the environment (not sure if )\n",
" \n",
" q_table[old_state, action] = q_table[old_state][action] + alpha * (reward + gamma * np.max(q_table[new_state]) - q_table[old_state][action])\n",
" \n",
" if iteration >= max_iterations and max_iterations != -1:\n",
" break\n",
"print(\"Epoch: {} Iterations: {}\\r\".format(epoch, iteration))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:10:37.176308Z",
"iopub.status.busy": "2020-10-23T07:10:37.175458Z",
"iopub.status.idle": "2020-10-23T07:10:37.179942Z",
"shell.execute_reply": "2020-10-23T07:10:37.180484Z"
},
"papermill": {
"duration": 0.026804,
"end_time": "2020-10-23T07:10:37.180659",
"exception": false,
"start_time": "2020-10-23T07:10:37.153855",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. ],\n",
" [ -2.41837066, -2.3639511 , -2.41837066, -2.3639511 ,\n",
" -2.27325184, -11.3639511 ],\n",
" [ -1.870144 , -1.45024 , -1.870144 , -1.45024 ,\n",
" -0.7504 , -10.45024 ],\n",
" ...,\n",
" [ -0.75955193, 0.416 , -0.75945955, -1.44283176,\n",
" -9.1295566 , -9.26328213],\n",
" [ -2.26879216, -2.1220864 , -2.24661946, -2.1220864 ,\n",
" -10.7816137 , -10.43290413],\n",
" [ 5.6 , 2.36 , 5.6 , 11. ,\n",
" -3.4 , -3.4 ]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q_table"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:10:37.220686Z",
"iopub.status.busy": "2020-10-23T07:10:37.219859Z",
"iopub.status.idle": "2020-10-23T07:10:37.228862Z",
"shell.execute_reply": "2020-10-23T07:10:37.229575Z"
},
"papermill": {
"duration": 0.033495,
"end_time": "2020-10-23T07:10:37.229780",
"exception": false,
"start_time": "2020-10-23T07:10:37.196285",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| :\u001b[43m \u001b[0m: : : |\n",
"| | : | : |\n",
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
"+---------+\n",
" (East)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : :\u001b[43m \u001b[0m: : |\n",
"| | : | : |\n",
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
"+---------+\n",
" (East)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : :\u001b[43m \u001b[0m: |\n",
"| | : | : |\n",
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
"+---------+\n",
" (East)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : |\u001b[43m \u001b[0m: |\n",
"|Y| : |\u001b[34;1mB\u001b[0m: |\n",
"+---------+\n",
" (South)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |\u001b[34;1m\u001b[43mB\u001b[0m\u001b[0m: |\n",
"+---------+\n",
" (South)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |\u001b[42mB\u001b[0m: |\n",
"+---------+\n",
" (Pickup)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : |\u001b[42m_\u001b[0m: |\n",
"|Y| : |B: |\n",
"+---------+\n",
" (North)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : :\u001b[42m_\u001b[0m: |\n",
"| | : | : |\n",
"|Y| : |B: |\n",
"+---------+\n",
" (North)\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | :\u001b[42m_\u001b[0m: |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |B: |\n",
"+---------+\n",
" (North)\n",
"+---------+\n",
"|R: | :\u001b[42m_\u001b[0m:\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |B: |\n",
"+---------+\n",
" (North)\n",
"+---------+\n",
"|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |B: |\n",
"+---------+\n",
" (East)\n",
"+---------+\n",
"|R: | : :\u001b[35m\u001b[34;1m\u001b[43mG\u001b[0m\u001b[0m\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |B: |\n",
"+---------+\n",
" (Dropoff)\n"
]
}
],
"source": [
"# example game\n",
"\n",
"new_state = env.reset()\n",
"\n",
"done = False\n",
"max_iterations = 100\n",
"iteration = 0\n",
"while not done and (iteration <= max_iterations or max_iterations == -1):\n",
" old_state = new_state\n",
" action = np.argmax(q_table[old_state])\n",
" new_state, reward, done, info = env.step(action)\n",
" env.s = new_state\n",
" env.render()\n",
" iteration += 1\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2020-10-23T07:10:37.269410Z",
"iopub.status.busy": "2020-10-23T07:10:37.268454Z",
"iopub.status.idle": "2020-10-23T07:10:37.659363Z",
"shell.execute_reply": "2020-10-23T07:10:37.658561Z"
},
"papermill": {
"duration": 0.413907,
"end_time": "2020-10-23T07:10:37.659523",
"exception": false,
"start_time": "2020-10-23T07:10:37.245616",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id</th>\n",
" <th>Value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2995</th>\n",
" <td>2996</td>\n",
" <td>2.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2996</th>\n",
" <td>2997</td>\n",
" <td>5.60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2997</th>\n",
" <td>2998</td>\n",
" <td>11.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2998</th>\n",
" <td>2999</td>\n",
" <td>-3.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2999</th>\n",
" <td>3000</td>\n",
" <td>-3.40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3000 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Id Value\n",
"0 1 0.00\n",
"1 2 0.00\n",
"2 3 0.00\n",
"3 4 0.00\n",
"4 5 0.00\n",
"... ... ...\n",
"2995 2996 2.36\n",
"2996 2997 5.60\n",
"2997 2998 11.00\n",
"2998 2999 -3.40\n",
"2999 3000 -3.40\n",
"\n",
"[3000 rows x 2 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#generate output file\n",
"df = pd.DataFrame(q_table.ravel())\n",
"\n",
"df.index += 1\n",
"df.to_csv(\"q_table.csv\", index_label=\"Id\", header=[\"Value\"])\n",
"df\n",
"\n",
"# load data again to test correct file format\n",
"pd.read_csv(\"q_table.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"papermill": {
"duration": 559.137382,
"end_time": "2020-10-23T07:10:37.784395",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2020-10-23T07:01:18.647013",
"version": "2.1.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}