diff options
Diffstat (limited to 'parser.py')
-rw-r--r-- | parser.py | 85 |
1 files changed, 0 insertions, 85 deletions
diff --git a/parser.py b/parser.py deleted file mode 100644 index f9e3e81..0000000 --- a/parser.py +++ /dev/null @@ -1,85 +0,0 @@ -import insinuations -import database -import datetime -import time -import json -import csv -import sys -import os - -def parse_csv(db, csv_path): - insinuations.get_sics(db) - - with open(csv_path, "r") as f: - num_lines = len(f.readlines()) - i = 0 - - with open(csv_path, "r") as f: - reader = csv.reader(f) - headers = next(reader) - - for name, id_, address, postcode, company_id, sic_codes, diff_mean_hourly_percent, diff_median_hourly_percent, \ - diff_mean_bonus_percent, diff_median_bonus_percent, male_bonus_percent, female_bonus_percent, male_lower_quartile, \ - female_lower_quartile, male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile, \ - female_upper_middle_quartile, male_top_quartile, female_top_quartile, policy_link, responsible_person, size, \ - current_name, submitted_after_deadline, duedate, submitted_date in reader: - - if company_id.strip() != "": - try: - sic_codes = {int(i.strip()) for i in sic_codes.split(",")} - except ValueError: - sic_codes = set() - - while True: - try: - company = insinuations.lookup_company(company_id) - except ConnectionError as e: - print("Couldn't connect... Error: '%s'... Waiting 20 seconds..." % str(e)) - time.sleep(20) - else: - break - - company["sics"] = company["sics"].union(sic_codes) - company["company_number"] = company_id - company["name"] = name - company["address"] = address - company["postcode"] = postcode - company["policy_link"] = policy_link - company["responsible_person"] = responsible_person - company["size"] = size - company["current_name"] = current_name - - print("%.2f%%" % ((i / num_lines) * 100), company) - print( - company_id, os.path.basename(csv_path), datetime.datetime.strptime(submitted_date, "%Y/%m/%d %H:%M:%S"), - diff_mean_hourly_percent, diff_median_hourly_percent, diff_mean_bonus_percent, diff_median_bonus_percent, - male_bonus_percent, female_bonus_percent, male_lower_quartile, female_lower_quartile, - male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile, - female_upper_middle_quartile, male_top_quartile, female_top_quartile - ) - db.append_employer(**company) - db.append_pay_info( - company_id, os.path.basename(csv_path), datetime.datetime.strptime(submitted_date, "%Y/%m/%d %H:%M:%S"), - diff_mean_hourly_percent, diff_median_hourly_percent, diff_mean_bonus_percent, diff_median_bonus_percent, - male_bonus_percent, female_bonus_percent, male_lower_quartile, female_lower_quartile, - male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile, - female_upper_middle_quartile, male_top_quartile, female_top_quartile - ) - i += 1 - - # break - -if __name__ == "__main__": - if not os.path.exists(".docker"): - import dotenv - dotenv.load_dotenv(dotenv_path = "db.env") - host = "srv.home" - else: - host = "db" - - with database.PayGapDatabase(host = host) as db: - p = sys.argv[1] - if os.path.basename(p) == "National_Statistics_Postcode_Lookup_UK.csv": - db.append_counties(p) - else: - parse_csv(db, p)
\ No newline at end of file |