>>2926538>>2926531class ModelsComGenealogyBot:
def
init(self):
self.base_url = "
https://www.models.com"
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
self.genealogy_graph = nx.DiGraph()
self.processed_models = set()
# Setup logging
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(
name)
def get_model_page(self, model_url):
"""Fetch and parse a model's profile page"""
try:
response = self.session.get(model_url, timeout=10)
response.raise_for_status()
return BeautifulSoup(response.content, 'html.parser')
except requests.RequestException as e:
self.logger.error(f"Error fetching {model_url}: {e}")
return None
def extract_model_info(self, soup, model_url):
"""Extract model information from profile page"""
if not soup:
return None
model_info = {
'url': model_url,
'name': self._extract_name(soup),
'family_members': self._extract_family_connections(soup)
}
return model_info
def _extract_name(self, soup):
"""Extract model's name"""
name_element = soup.find('h1', class_='model-name') or soup.find('title')
if name_element:
return name_element.get_text(strip=True)
return "Unknown"
def _extract_family_connections(self, soup):
"""Extract family connections from model profile"""
family_connections = []
# Look for family-related text in various elements
potential_elements = soup.find_all(['p', 'div', 'span'])
family_keywords = ['mother', 'father', 'sister', 'brother', 'daughter',
'son', 'parent', 'child', 'family', 'related to']
for element in potential_elements:
text = element.get_text().lower()
if any(keyword in text for keyword in family_keywords):
# Extract potential names and relationships
connection = self._parse_family_text(text, element)
if connection:
family_connections.append(connection)
return family_connections
def _parse_family_text(self, text, element):
"""Parse family relationship text"""
# This is a simplified parser - you might want to enhance it with NLP
relationships = {}
if 'mother' in text:
relationships['mother'] = self._find_names_in_text(text)
if 'father' in text:
relationships['father'] = self._find_names_in_text(text)
if 'sister' in text:
relationships['sister'] = self._find_names_in_text(text)
if 'brother' in text:
relationships['brother'] = self._find_names_in_text(text)
if 'daughter' in text:
relationships['daughter'] = self._find_names_in_text(text)
if 'son' in text:
relationships['son'] = self._find_names_in_text(text)
return relationships if relationships else None
def _find_names_in_text(self, text):
"""Simple name extraction - could be improved with NLP"""
import re
# Basic pattern for names (capitalized words)
name_pattern = r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b'
names = re.findall(name_pattern, text)
# Filter out common words that might be capitalized
common_words = {'The', 'And', 'Or', 'But', 'For', 'Nor', 'As', 'At', 'By', 'In', 'Of', 'On', 'To', 'Up', 'Via', 'With'}
return [name for name in names if name not in common_words and len(name) > 2]
def search_models(self, query=None, max_results=50):
"""Search for models on models.com"""
models = []
page = 1
while len(models) < max_results:
search_url = f"{self.base_url}/search?q={query or ''}&page={