import magic
import sys
import os
from pathlib import Path

data_dir = 'local-media'

out_path = 'local-media-mimetypes-valid.csv'
out_path_random = 'local-media-mimetypes-octect-stream.csv'
out_file = open(out_path, "w")
out_file_random = open(out_path_random, "w")

counter = 0
for f in os.listdir(data_dir):
    filepath = Path(data_dir+'/'+f)
    fileid = filepath.name
    mime = magic.from_file(str(filepath), mime=True)
    if mime == 'application/octet-stream':
        out_file_random.write(f'{fileid}\t{mime}\n')
    else:
        out_file.write(f'{fileid}\t{mime}\n')
    
    counter += 1

    if counter % 1000 == 0:
        print(f'Processed {counter} files')
        out_file.flush()
        out_file_random.flush()


out_file.close()
out_file_random.close()
