mirror of
https://github.com/13hannes11/UU_NCML_Project.git
synced 2024-09-03 20:50:59 +02:00
Merge branch 'main' of https://github.com/13hannes11/UU_NCML_Project into main
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
|||||||
de/csv
|
de/csv
|
||||||
|
chromedriver
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
@@ -1 +1,7 @@
|
|||||||
# UU_NCML_Project
|
# UU_NCML_Project
|
||||||
|
|
||||||
|
## Selenium
|
||||||
|
|
||||||
|
|
||||||
|
Install a google chrome, download the corresponding Webdriver and place it into this folder (filename: `chromedriver`):
|
||||||
|
https://www.selenium.dev/documentation/en/webdriver/driver_requirements/
|
||||||
@@ -1,15 +1,29 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# Convert data to csv
|
|
||||||
base_dir = "./de/"
|
base_dir = "./de/"
|
||||||
|
out_dir = "csv/"
|
||||||
|
in_dir = "input/"
|
||||||
|
title_file = "filename_to_titles.csv"
|
||||||
|
|
||||||
out_dir = "csv"
|
|
||||||
if not os.path.exists(base_dir + out_dir):
|
|
||||||
os.makedirs(base_dir + out_dir)
|
|
||||||
|
|
||||||
for dirname, _, filenames in os.walk(base_dir + 'input'):
|
if not os.path.exists(os.path.join(base_dir, out_dir)):
|
||||||
|
os.makedirs(os.path.join(base_dir, out_dir))
|
||||||
|
|
||||||
|
# Copy titles file and replace file endings
|
||||||
|
print(f'Copying Title File')
|
||||||
|
with open(os.path.join(base_dir, in_dir, title_file), 'r') as file:
|
||||||
|
file_content = file.read()
|
||||||
|
file_content = file_content.replace('.xlsx', '.csv')
|
||||||
|
with open(os.path.join(base_dir, out_dir, title_file), 'w') as file:
|
||||||
|
file.write(file_content)
|
||||||
|
|
||||||
|
# Convert xlsx files to csv
|
||||||
|
for dirname, _, filenames in os.walk(os.path.join(base_dir, in_dir)):
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
read_file = pd.read_excel (os.path.join(dirname, filename))
|
if filename != title_file:
|
||||||
read_file.to_csv (os.path.join(base_dir + out_dir, filename.split(".", 1)[0] + ".csv"), index = None, header=True)
|
print(f'Reading {filename}')
|
||||||
|
read_file = pd.read_excel (os.path.join(dirname, filename))
|
||||||
|
print(f'Saving {filename}')
|
||||||
|
read_file.to_csv (os.path.join(base_dir, out_dir, filename.split(".", 1)[0] + ".csv"), index = None, header=True)
|
||||||
|
|
||||||
|
|||||||
BIN
de/input/20190926_1_xls-data.xlsx
Normal file
BIN
de/input/20190926_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20190926_3_xls-data.xlsx
Normal file
BIN
de/input/20190926_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20190926_4_xls-data.xlsx
Normal file
BIN
de/input/20190926_4_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20190926_5_xls-data.xlsx
Normal file
BIN
de/input/20190926_5_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20190926_6_xls-data.xlsx
Normal file
BIN
de/input/20190926_6_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20190926_7_xls-data.xlsx
Normal file
BIN
de/input/20190926_7_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191017_1_xls-data.xlsx
Normal file
BIN
de/input/20191017_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191017_2_xls-data.xlsx
Normal file
BIN
de/input/20191017_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191018_1_xls-data.xlsx
Normal file
BIN
de/input/20191018_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191018_2_xls-data.xlsx
Normal file
BIN
de/input/20191018_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191024_1_xls-data.xlsx
Normal file
BIN
de/input/20191024_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191024_2_xls-data.xlsx
Normal file
BIN
de/input/20191024_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191024_3_xls-data.xlsx
Normal file
BIN
de/input/20191024_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191107_1_xls-data.xlsx
Normal file
BIN
de/input/20191107_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191107_2_xls-data.xlsx
Normal file
BIN
de/input/20191107_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191113_1_xls-data.xlsx
Normal file
BIN
de/input/20191113_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191113_2_xls-data.xlsx
Normal file
BIN
de/input/20191113_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_1_xls-data.xlsx
Normal file
BIN
de/input/20191114_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_2_xls-data.xlsx
Normal file
BIN
de/input/20191114_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_3_xls-data.xlsx
Normal file
BIN
de/input/20191114_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_4_xls-data.xlsx
Normal file
BIN
de/input/20191114_4_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_5_xls-data.xlsx
Normal file
BIN
de/input/20191114_5_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_6_xls-data.xlsx
Normal file
BIN
de/input/20191114_6_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191114_7_xls-data.xlsx
Normal file
BIN
de/input/20191114_7_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191115_1_xls-data.xlsx
Normal file
BIN
de/input/20191115_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191126_1_xls-data.xlsx
Normal file
BIN
de/input/20191126_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191127_1_xls-data.xlsx
Normal file
BIN
de/input/20191127_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191129_1_xls-data.xlsx
Normal file
BIN
de/input/20191129_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191129_2_xls-data.xlsx
Normal file
BIN
de/input/20191129_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191129_3_xls-data.xlsx
Normal file
BIN
de/input/20191129_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191219_1_xls-data.xlsx
Normal file
BIN
de/input/20191219_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191219_2_xls-data.xlsx
Normal file
BIN
de/input/20191219_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191219_3_xls-data.xlsx
Normal file
BIN
de/input/20191219_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191219_4_xls-data.xlsx
Normal file
BIN
de/input/20191219_4_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191219_5_xls-data.xlsx
Normal file
BIN
de/input/20191219_5_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191219_6_xls-data.xlsx
Normal file
BIN
de/input/20191219_6_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191220_1_xls-data.xlsx
Normal file
BIN
de/input/20191220_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20191220_2_xls-data.xlsx
Normal file
BIN
de/input/20191220_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200115_xls-data.xlsx
Normal file
BIN
de/input/20200115_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200116_1_xls-data.xlsx
Normal file
BIN
de/input/20200116_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200116_2_xls-data.xlsx
Normal file
BIN
de/input/20200116_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200116_3_xls-data.xlsx
Normal file
BIN
de/input/20200116_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200117_1_xls-data.xlsx
Normal file
BIN
de/input/20200117_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200130_1_xls-data.xlsx
Normal file
BIN
de/input/20200130_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200130_2_xls-data.xlsx
Normal file
BIN
de/input/20200130_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200130_3_xls-data.xlsx
Normal file
BIN
de/input/20200130_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200304_1_xls-data.xlsx
Normal file
BIN
de/input/20200304_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200313_1_xls-data.xlsx
Normal file
BIN
de/input/20200313_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200313_2_xls-data.xlsx
Normal file
BIN
de/input/20200313_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200325_1_xls-data.xlsx
Normal file
BIN
de/input/20200325_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200507_1_xls-data.xlsx
Normal file
BIN
de/input/20200507_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200514_1_xls-data.xlsx
Normal file
BIN
de/input/20200514_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200514_2_xls-data.xlsx
Normal file
BIN
de/input/20200514_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200514_3_xls-data.xlsx
Normal file
BIN
de/input/20200514_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200514_4_xls-data.xlsx
Normal file
BIN
de/input/20200514_4_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200527_1_xls-data.xlsx
Normal file
BIN
de/input/20200527_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200529_1_xls-data.xlsx
Normal file
BIN
de/input/20200529_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200529_2_xls-data.xlsx
Normal file
BIN
de/input/20200529_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200617_1_xls-data.xlsx
Normal file
BIN
de/input/20200617_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200617_2_xls-data.xlsx
Normal file
BIN
de/input/20200617_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200618_2_xls-data.xlsx
Normal file
BIN
de/input/20200618_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200619_1_xls-data.xlsx
Normal file
BIN
de/input/20200619_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200702_1_xls-data.xlsx
Normal file
BIN
de/input/20200702_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200702_2_xls-data.xlsx
Normal file
BIN
de/input/20200702_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200703_1_xls-data.xlsx
Normal file
BIN
de/input/20200703_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200703_2_xls-data.xlsx
Normal file
BIN
de/input/20200703_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200910_2_xls-data.xlsx
Normal file
BIN
de/input/20200910_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200916_1_xls-data.xlsx
Normal file
BIN
de/input/20200916_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200917_1_xls-data.xlsx
Normal file
BIN
de/input/20200917_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200918_1_xls-data.xlsx
Normal file
BIN
de/input/20200918_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20200918_2_xls-data.xlsx
Normal file
BIN
de/input/20200918_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201008_1_xls-data.xlsx
Normal file
BIN
de/input/20201008_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201009_1_xls-data.xlsx
Normal file
BIN
de/input/20201009_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201009_2_xls-data.xlsx
Normal file
BIN
de/input/20201009_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201009_3_xls-data.xlsx
Normal file
BIN
de/input/20201009_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201029_2_xls-data.xlsx
Normal file
BIN
de/input/20201029_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201030_1_xls-data.xlsx
Normal file
BIN
de/input/20201030_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201030_2_xls-data.xlsx
Normal file
BIN
de/input/20201030_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201118_1_xls-data.xlsx
Normal file
BIN
de/input/20201118_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201118_2_xls-data.xlsx
Normal file
BIN
de/input/20201118_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201118_3_xls-data.xlsx
Normal file
BIN
de/input/20201118_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201118_4_xls-data.xlsx
Normal file
BIN
de/input/20201118_4_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201119_1_xls-data.xlsx
Normal file
BIN
de/input/20201119_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201126_2_xls-data.xlsx
Normal file
BIN
de/input/20201126_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201126_3_xls-data.xlsx
Normal file
BIN
de/input/20201126_3_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201208_1_xls-data.xlsx
Normal file
BIN
de/input/20201208_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201208_2_xls-data.xlsx
Normal file
BIN
de/input/20201208_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201209_1_xls-data.xlsx
Normal file
BIN
de/input/20201209_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201211_1_xls-data.xlsx
Normal file
BIN
de/input/20201211_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201216_1_xls-data.xlsx
Normal file
BIN
de/input/20201216_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20201217_1_xls-data.xlsx
Normal file
BIN
de/input/20201217_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20210114_1_xls-data.xlsx
Normal file
BIN
de/input/20210114_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20210128_1_xls-data.xlsx
Normal file
BIN
de/input/20210128_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20210129_1_xls-data.xlsx
Normal file
BIN
de/input/20210129_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20210225_2_xls-data.xlsx
Normal file
BIN
de/input/20210225_2_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20210303_1_xls-data.xlsx
Normal file
BIN
de/input/20210303_1_xls-data.xlsx
Normal file
Binary file not shown.
BIN
de/input/20210303_2_xls-data.xlsx
Normal file
BIN
de/input/20210303_2_xls-data.xlsx
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user