1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
import insinuations
import database
import datetime
import time
import json
import csv
import sys
import os
def parse_csv(db, csv_path):
insinuations.get_sics(db)
with open(csv_path, "r") as f:
num_lines = len(f.readlines())
i = 0
with open(csv_path, "r") as f:
reader = csv.reader(f)
headers = next(reader)
for name, id_, address, postcode, company_id, sic_codes, diff_mean_hourly_percent, diff_median_hourly_percent, \
diff_mean_bonus_percent, diff_median_bonus_percent, male_bonus_percent, female_bonus_percent, male_lower_quartile, \
female_lower_quartile, male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile, \
female_upper_middle_quartile, male_top_quartile, female_top_quartile, policy_link, responsible_person, size, \
current_name, submitted_after_deadline, duedate, submitted_date in reader:
if company_id.strip() != "":
try:
sic_codes = {int(i.strip()) for i in sic_codes.split(",")}
except ValueError:
sic_codes = set()
while True:
try:
company = insinuations.lookup_company(company_id)
except ConnectionError as e:
print("Couldn't connect... Error: '%s'... Waiting 20 seconds..." % str(e))
time.sleep(20)
else:
break
company["sics"] = company["sics"].union(sic_codes)
company["company_number"] = company_id
company["name"] = name
company["address"] = address
company["postcode"] = postcode
company["policy_link"] = policy_link
company["responsible_person"] = responsible_person
company["size"] = size
company["current_name"] = current_name
print("%.2f%%" % ((i / num_lines) * 100), company)
print(
company_id, os.path.basename(csv_path), datetime.datetime.strptime(submitted_date, "%Y/%m/%d %H:%M:%S"),
diff_mean_hourly_percent, diff_median_hourly_percent, diff_mean_bonus_percent, diff_median_bonus_percent,
male_bonus_percent, female_bonus_percent, male_lower_quartile, female_lower_quartile,
male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile,
female_upper_middle_quartile, male_top_quartile, female_top_quartile
)
db.append_employer(**company)
db.append_pay_info(
company_id, os.path.basename(csv_path), datetime.datetime.strptime(submitted_date, "%Y/%m/%d %H:%M:%S"),
diff_mean_hourly_percent, diff_median_hourly_percent, diff_mean_bonus_percent, diff_median_bonus_percent,
male_bonus_percent, female_bonus_percent, male_lower_quartile, female_lower_quartile,
male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile,
female_upper_middle_quartile, male_top_quartile, female_top_quartile
)
i += 1
# break
if __name__ == "__main__":
if not os.path.exists(".docker"):
import dotenv
dotenv.load_dotenv(dotenv_path = "db.env")
host = "srv.home"
else:
host = "db"
with database.PayGapDatabase(host = host) as db:
parse_csv(db, sys.argv[1])
|