mirror of
https://github.com/13hannes11/UU_NCML_Project.git
synced 2024-09-03 20:50:59 +02:00
improve convert to csv and copy title_file
This commit is contained in:
@@ -1,15 +1,29 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# Convert data to csv
|
|
||||||
base_dir = "./de/"
|
base_dir = "./de/"
|
||||||
|
out_dir = "csv/"
|
||||||
|
in_dir = "input/"
|
||||||
|
title_file = "filename_to_titles.csv"
|
||||||
|
|
||||||
out_dir = "csv"
|
|
||||||
if not os.path.exists(base_dir + out_dir):
|
|
||||||
os.makedirs(base_dir + out_dir)
|
|
||||||
|
|
||||||
for dirname, _, filenames in os.walk(base_dir + 'input'):
|
if not os.path.exists(os.path.join(base_dir, out_dir)):
|
||||||
|
os.makedirs(os.path.join(base_dir, out_dir))
|
||||||
|
|
||||||
|
# Copy titles file and replace file endings
|
||||||
|
print(f'Copying Title File')
|
||||||
|
with open(os.path.join(base_dir, in_dir, title_file), 'r') as file:
|
||||||
|
file_content = file.read()
|
||||||
|
file_content = file_content.replace('.xlsx', '.csv')
|
||||||
|
with open(os.path.join(base_dir, out_dir, title_file), 'w') as file:
|
||||||
|
file.write(file_content)
|
||||||
|
|
||||||
|
# Convert xlsx files to csv
|
||||||
|
for dirname, _, filenames in os.walk(os.path.join(base_dir, in_dir)):
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
read_file = pd.read_excel (os.path.join(dirname, filename))
|
if filename != title_file:
|
||||||
read_file.to_csv (os.path.join(base_dir + out_dir, filename.split(".", 1)[0] + ".csv"), index = None, header=True)
|
print(f'Reading {filename}')
|
||||||
|
read_file = pd.read_excel (os.path.join(dirname, filename))
|
||||||
|
print(f'Saving {filename}')
|
||||||
|
read_file.to_csv (os.path.join(base_dir, out_dir, filename.split(".", 1)[0] + ".csv"), index = None, header=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user