import magic
import sys
import os
import multiprocessing
from pathlib import Path

data_dir = 'local-media'

def check_file_type(f):
    filepath = Path(f)
    fileid = filepath.name

    mime = magic.from_file(str(filepath), mime=True)
    return (fileid, mime)


files = [data_dir+'/'+f for f in os.listdir(data_dir)]
mime_types = []

print(f'Starting...')

with multiprocessing.Pool(100) as pool:
    mime_types = pool.map(check_file_type, files, 100);

out_path = 'local-media-mimetypes-valid.csv'
out_path_random = 'local-media-mimetypes-octet-stream.csv'
out_file = open(out_path, "w")
out_file_random = open(out_path_random, "w")

print(f'Writing output - {len(files)} mappings')

for fm in mime_types:
    fileid = fm[0]
    mime = fm[1]
    if mime == 'application/octet-stream':
        out_file_random.write(f'{fileid}\t{mime}\n')
    else:
        out_file.write(f'{fileid}\t{mime}\n')

out_file.close()
out_file_random.close()
