mirror of
https://github.com/13hannes11/UU_NCML_Project.git
synced 2024-09-03 20:50:59 +02:00
improve convert to csv and copy title_file
This commit is contained in:
@@ -1,15 +1,29 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
# Convert data to csv
|
||||
base_dir = "./de/"
|
||||
out_dir = "csv/"
|
||||
in_dir = "input/"
|
||||
title_file = "filename_to_titles.csv"
|
||||
|
||||
out_dir = "csv"
|
||||
if not os.path.exists(base_dir + out_dir):
|
||||
os.makedirs(base_dir + out_dir)
|
||||
|
||||
for dirname, _, filenames in os.walk(base_dir + 'input'):
|
||||
if not os.path.exists(os.path.join(base_dir, out_dir)):
|
||||
os.makedirs(os.path.join(base_dir, out_dir))
|
||||
|
||||
# Copy titles file and replace file endings
|
||||
print(f'Copying Title File')
|
||||
with open(os.path.join(base_dir, in_dir, title_file), 'r') as file:
|
||||
file_content = file.read()
|
||||
file_content = file_content.replace('.xlsx', '.csv')
|
||||
with open(os.path.join(base_dir, out_dir, title_file), 'w') as file:
|
||||
file.write(file_content)
|
||||
|
||||
# Convert xlsx files to csv
|
||||
for dirname, _, filenames in os.walk(os.path.join(base_dir, in_dir)):
|
||||
for filename in filenames:
|
||||
read_file = pd.read_excel (os.path.join(dirname, filename))
|
||||
read_file.to_csv (os.path.join(base_dir + out_dir, filename.split(".", 1)[0] + ".csv"), index = None, header=True)
|
||||
if filename != title_file:
|
||||
print(f'Reading {filename}')
|
||||
read_file = pd.read_excel (os.path.join(dirname, filename))
|
||||
print(f'Saving {filename}')
|
||||
read_file.to_csv (os.path.join(base_dir, out_dir, filename.split(".", 1)[0] + ".csv"), index = None, header=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user