Платный заказ
Бюджет:
1200 руб
Ошибка такая:
ValueError: [E993] The config for the nlp object needs to include a key `lang` specifying the code of the language to initialize it with (for example 'en' for English) this can't be None.
import os
import csv
import spacy
import en_core_web_sm
from pyresparser import ResumeParser
from pyresparser import utils
from tqdm import tqdm
nlp = spacy.load("en_core_web_sm")
def parse_resumes(resume_folder, output_csv):
total_files = len([filename for filename in os.listdir(resume_folder) if filename.endswith((".docx", ".pdf"))])
processed_files = 0
with open(output_csv, mode='w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['Name', 'Email', 'Phone', 'State']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
# Process each resume in the folder using tqdm to show the progress bar
for filename in tqdm(os.listdir(resume_folder), desc="Processing resumes", unit="file"):
if filename.endswith((".docx", ".pdf")):
file_path = os.path.join(resume_folder, filename)
# Parse the resume using pyresparser
data = ResumeParser(file_path, skills_file=None, custom_regex=None).get_extracted_data()
# Get the relevant information
name = data.get('name', '')
email = data.get('email', '')
phone = data.get('mobile_number', '')
state = '' # We don't get the state from pyresparser
# Get the language of the resume
language = langid.classify(text)[0]
# Get the confidence score of the language detection
confidence = langid.classify(text)[1]
# Only process the resume if it is in English
if language == "en" and confidence > 0.9:
writer.writerow({'Name': name, 'Email': email, 'Phone': phone, 'State': state})
processed_files += 1
# Update the progress bar
progress = (processed_files / total_files) * 100
tqdm.write(f"Progress: [{int(progress)}%] {'=' * int(progress // 5)} {filename}")
print("\nProcessing completed!")
if __name__ == "__main__":
resume_folder = "resumes"
output_csv = "resume_info.csv"
parse_resumes(resume_folder, output_csv)
Рейтинг: 217
Исполнитель определен: