Skip to content

Commit 258db0b

Browse files
Joshua Zhoutektaxi
authored andcommitted
feats and also the download resumes script
1 parent 2bdfde3 commit 258db0b

File tree

4 files changed

+230
-5
lines changed

4 files changed

+230
-5
lines changed

.DS_Store

8 KB
Binary file not shown.

constants/role.constant.js

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ const sponsorT1Role = {
9595

9696
Constants.Routes.searchRoutes.get,
9797
Constants.Routes.accountRoutes.getAnyById,
98-
Constants.Routes.hackerRoutes.getAnyById
98+
Constants.Routes.hackerRoutes.getAnyById,
99+
Constants.Routes.teamRoutes.get
99100
]
100101
};
101102

@@ -110,7 +111,8 @@ const sponsorT2Role = {
110111

111112
Constants.Routes.searchRoutes.get,
112113
Constants.Routes.accountRoutes.getAnyById,
113-
Constants.Routes.hackerRoutes.getAnyById
114+
Constants.Routes.hackerRoutes.getAnyById,
115+
Constants.Routes.teamRoutes.get
114116
]
115117
};
116118

@@ -125,7 +127,8 @@ const sponsorT3Role = {
125127

126128
Constants.Routes.searchRoutes.get,
127129
Constants.Routes.accountRoutes.getAnyById,
128-
Constants.Routes.hackerRoutes.getAnyById
130+
Constants.Routes.hackerRoutes.getAnyById,
131+
Constants.Routes.teamRoutes.get
129132
]
130133
};
131134

@@ -140,7 +143,8 @@ const sponsorT4Role = {
140143

141144
Constants.Routes.searchRoutes.get,
142145
Constants.Routes.accountRoutes.getAnyById,
143-
Constants.Routes.hackerRoutes.getAnyById
146+
Constants.Routes.hackerRoutes.getAnyById,
147+
Constants.Routes.teamRoutes.get
144148
]
145149
};
146150

@@ -155,7 +159,8 @@ const sponsorT5Role = {
155159

156160
Constants.Routes.searchRoutes.get,
157161
Constants.Routes.accountRoutes.getAnyById,
158-
Constants.Routes.hackerRoutes.getAnyById
162+
Constants.Routes.hackerRoutes.getAnyById,
163+
Constants.Routes.teamRoutes.get
159164
]
160165
};
161166

scripts/.DS_Store

8 KB
Binary file not shown.

scripts/download_all_resumes.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import os
4+
import tempfile
5+
import zipfile
6+
from urllib.parse import quote_plus
7+
8+
from dotenv import load_dotenv
9+
from google.cloud import storage
10+
from google.oauth2 import service_account
11+
from bson import ObjectId
12+
from pymongo import MongoClient
13+
14+
15+
def build_mongo_uri(address: str, username: str, password: str) -> str:
16+
address = address.strip()
17+
if address.startswith("mongodb://") or address.startswith("mongodb+srv://"):
18+
return address
19+
if "@" in address:
20+
return f"mongodb://{address}"
21+
user = quote_plus(username)
22+
pwd = quote_plus(password)
23+
return f"mongodb://{user}:{pwd}@{address}"
24+
25+
26+
def guess_extension(content_type: str) -> str:
27+
if not content_type:
28+
return ""
29+
mapping = {
30+
"application/pdf": ".pdf",
31+
"application/msword": ".doc",
32+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
33+
"image/png": ".png",
34+
"image/jpeg": ".jpg",
35+
}
36+
return mapping.get(content_type, "")
37+
38+
39+
def safe_name(value: str) -> str:
40+
cleaned = "".join(
41+
ch for ch in value.strip() if ch.isalnum() or ch in ("-", "_")
42+
)
43+
return cleaned or "unknown"
44+
45+
46+
def main() -> int:
47+
load_dotenv(override=True)
48+
parser = argparse.ArgumentParser(
49+
description="Download all hacker resumes from GCS and bundle into a zip."
50+
)
51+
parser.add_argument(
52+
"--out",
53+
default="resumes.zip",
54+
help="Output zip path (default: resumes.zip).",
55+
)
56+
parser.add_argument(
57+
"--debug",
58+
action="store_true",
59+
help="Enable verbose logging for troubleshooting.",
60+
)
61+
args = parser.parse_args()
62+
63+
address = os.environ.get("DB_ADDRESS_DEPLOY")
64+
username = os.environ.get("DB_USER_DEPLOY")
65+
password = os.environ.get("DB_PASS_DEPLOY")
66+
bucket_name = os.environ.get("BUCKET_NAME")
67+
68+
missing = [k for k, v in {
69+
"DB_ADDRESS_DEPLOY": address,
70+
"DB_USER_DEPLOY": username,
71+
"DB_PASS_DEPLOY": password,
72+
"BUCKET_NAME": bucket_name,
73+
}.items() if not v]
74+
if missing:
75+
raise SystemExit(f"Missing required env vars: {', '.join(missing)}")
76+
77+
gcs_env = {
78+
"TYPE": os.environ.get("TYPE"),
79+
"PROJECT_ID": os.environ.get("PROJECT_ID"),
80+
"PRIVATE_KEY_ID": os.environ.get("PRIVATE_KEY_ID"),
81+
"PRIVATE_KEY": os.environ.get("PRIVATE_KEY"),
82+
"CLIENT_EMAIL": os.environ.get("CLIENT_EMAIL"),
83+
"CLIENT_ID": os.environ.get("CLIENT_ID"),
84+
"AUTH_URI": os.environ.get("AUTH_URI"),
85+
"TOKEN_URI": os.environ.get("TOKEN_URI"),
86+
"AUTH_PROVIDER_X509_CERT_URL": os.environ.get("AUTH_PROVIDER_X509_CERT_URL"),
87+
"CLIENT_X509_CERT_URL": os.environ.get("CLIENT_X509_CERT_URL"),
88+
}
89+
missing_gcs = [k for k, v in gcs_env.items() if not v]
90+
if missing_gcs:
91+
raise SystemExit(f"Missing required GCS env vars: {', '.join(missing_gcs)}")
92+
93+
mongo_uri = build_mongo_uri(address, username, password)
94+
if args.debug:
95+
print(f"Mongo URI: {mongo_uri}")
96+
client = MongoClient(mongo_uri)
97+
db_name = "hackboard-deploy"
98+
db = client[db_name]
99+
hackers = db["hackers"]
100+
accounts = db["accounts"]
101+
account_cache = {}
102+
if args.debug:
103+
print(f"Database: {db_name}")
104+
print(f"Collections: {', '.join(sorted(db.list_collection_names()))}")
105+
print(f"Hackers count: {hackers.count_documents({})}")
106+
107+
query = {"application.general.URL.resume": {"$exists": True, "$ne": ""}}
108+
projection = {"application.general.URL.resume": 1, "accountId": 1}
109+
cursor = hackers.find(query, projection=projection)
110+
if args.debug:
111+
match_count = hackers.count_documents(query)
112+
print(f"Resume query matches: {match_count}")
113+
114+
private_key = gcs_env["PRIVATE_KEY"]
115+
if private_key and "\\n" in private_key:
116+
private_key = private_key.replace("\\n", "\n")
117+
118+
credentials_info = {
119+
"type": gcs_env["TYPE"],
120+
"project_id": gcs_env["PROJECT_ID"],
121+
"private_key_id": gcs_env["PRIVATE_KEY_ID"],
122+
"private_key": private_key,
123+
"client_email": gcs_env["CLIENT_EMAIL"],
124+
"client_id": gcs_env["CLIENT_ID"],
125+
"auth_uri": gcs_env["AUTH_URI"],
126+
"token_uri": gcs_env["TOKEN_URI"],
127+
"auth_provider_x509_cert_url": gcs_env["AUTH_PROVIDER_X509_CERT_URL"],
128+
"client_x509_cert_url": gcs_env["CLIENT_X509_CERT_URL"],
129+
}
130+
credentials = service_account.Credentials.from_service_account_info(
131+
credentials_info
132+
)
133+
storage_client = storage.Client(
134+
project=credentials_info["project_id"], credentials=credentials
135+
)
136+
bucket = storage_client.bucket(bucket_name)
137+
138+
total = 0
139+
downloaded = 0
140+
with tempfile.TemporaryDirectory() as tmpdir:
141+
for doc in cursor:
142+
total += 1
143+
resume_path = (
144+
doc.get("application", {})
145+
.get("general", {})
146+
.get("URL", {})
147+
.get("resume", "")
148+
)
149+
if not resume_path:
150+
if args.debug:
151+
print(f"Skip {doc.get('_id')}: missing resume path")
152+
continue
153+
154+
blob = bucket.blob(resume_path)
155+
if not blob.exists():
156+
if args.debug:
157+
print(f"Missing blob: {resume_path}")
158+
continue
159+
160+
account_id = doc.get("accountId")
161+
if isinstance(account_id, dict) and "$oid" in account_id:
162+
account_id = account_id["$oid"]
163+
if isinstance(account_id, str):
164+
try:
165+
account_id = ObjectId(account_id)
166+
except Exception:
167+
pass
168+
account = {}
169+
if account_id in account_cache:
170+
account = account_cache[account_id]
171+
elif account_id is not None:
172+
account = accounts.find_one({"_id": account_id}) or {}
173+
if not account and isinstance(account_id, ObjectId):
174+
account = (
175+
accounts.find_one({"_id": str(account_id)}) or {}
176+
)
177+
account_cache[account_id] = account
178+
179+
first = safe_name(str(account.get("firstName", "")))
180+
last = safe_name(str(account.get("lastName", "")))
181+
182+
if args.debug:
183+
print("Names: ", first, last)
184+
185+
if first == "unknown" and last == "unknown":
186+
name_stub = str(doc["_id"])
187+
else:
188+
name_stub = f"{first}_{last}"
189+
190+
basename = os.path.basename(resume_path)
191+
ext = os.path.splitext(basename)[1]
192+
if not ext:
193+
blob.reload()
194+
ext = guess_extension(blob.content_type)
195+
196+
local_name = (
197+
f"{name_stub}_resume{ext if ext else ''}"
198+
if name_stub != str(doc["_id"])
199+
else f"{doc['_id']}__{basename}{ext if ext else ''}"
200+
)
201+
local_path = os.path.join(tmpdir, local_name)
202+
203+
if args.debug:
204+
print(f"Downloading {resume_path} -> {local_name}")
205+
with open(local_path, "wb") as fh:
206+
fh.write(blob.download_as_bytes())
207+
downloaded += 1
208+
209+
with zipfile.ZipFile(args.out, "w", compression=zipfile.ZIP_DEFLATED) as zf:
210+
for name in os.listdir(tmpdir):
211+
path = os.path.join(tmpdir, name)
212+
zf.write(path, arcname=name)
213+
214+
print(f"Processed {total} hackers, downloaded {downloaded} resumes.")
215+
print(f"Wrote {args.out}.")
216+
return 0
217+
218+
219+
if __name__ == "__main__":
220+
raise SystemExit(main())

0 commit comments

Comments
 (0)