-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAbstractParser.py
More file actions
60 lines (49 loc) · 2.04 KB
/
AbstractParser.py
File metadata and controls
60 lines (49 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from general_utils import *
from config import *
from typing import List, Dict
import AdvancedEmbedder
import ConceptExtractor
class AbstractParser:
"""
Parses a scientific abstract to extract concepts and compute their relevance
to the abstract using a given concept extractor and embedder.
"""
def __init__(self, my_abstract:str, concept_extractor: ConceptExtractor, embedder: AdvancedEmbedder):
"""
Initializes the parser with a cleaned abstract, concept extractor, and embedder.
Args:
my_abstract (str): The raw abstract text.
concept_extractor (ConceptExtractor): The object for extracting and lemmatizing concepts.
embedder (AdvancedEmbedder): The object for computing semantic relevance.
"""
self.clean_abstract = clean_text(my_abstract)
self.concept_extractor = concept_extractor
self.embedder = embedder
def clean_text(self) -> str:
"""
Returns the cleaned abstract text.
Returns:
str: Cleaned abstract.
"""
return self.clean_abstract
def raw_concepts(self) -> List[str]:
"""
Extracts raw noun phrases from the abstract.
Returns:
List[str]: List of raw concept phrases.
"""
return self.concept_extractor.extract_noun_phrases(self.clean_abstract)
def clean_concepts(self) -> List[str]:
"""
Returns a list of lemmatized concept phrases.
Returns:
List[str]: Cleaned (lemmatized) concept phrases.
"""
return [self.concept_extractor.lemmatize(concept) for concept in self.raw_concepts()]
def concept_relevances(self) -> List[float]:
"""
Computes a relevance score for each concept with respect to the abstract.
Returns:
Dict[str, float]: Dictionary mapping each concept to its relevance score.
"""
return [self.embedder.calculate_relevance(concept, self.clean_abstract) for concept in self.raw_concepts()]