diff --git a/soccerdata/sofifa.py b/soccerdata/sofifa.py index 9325cee..f322dda 100644 --- a/soccerdata/sofifa.py +++ b/soccerdata/sofifa.py @@ -475,16 +475,22 @@ def read_player_ratings( "player": before_br if before_br else after_br, **version.to_dict(), } - for s in score_labels: - nodes = tree.xpath( - f"(//li[not(self::script)] | //div | //p)[.//text()[contains(.,'{s}')]]//em" - ) - # for multiple matches, only accept first match - if len(nodes) >= 1: - scores[s] = nodes[0].text.strip() - # if there's no match, put NA - else: - scores[s] = None - ratings.append(scores) + + # Try each XPath until one returns a result + for s in score_labels: + value = None + xpaths = [ + f"//p[.//text()[contains(.,'{s}')]]/span/em", + f"//div[contains(.,'{s}')]/em", + f"//li[not(self::script)][.//text()[contains(.,'{s}')]]/em", + ] + for xpath in xpaths: + nodes = tree.xpath(xpath) + if nodes: # If at least one match is found + value = nodes[0].text.strip() # Take only the first match + break # Stop checking other XPaths once we find a valid value + + scores[s] = value if value is not None else None # Assign only once + ratings.append(scores) # return data frame return pd.DataFrame(ratings).pipe(standardize_colnames).set_index(["player"]).sort_index()