import nltk # SAT 10-12-24 with errors under terminal. This is the most recent code from PyCharm
from nltk.tokenize import word_tokenize
from nltk.tree import Tree
Ensure the necessary NLTK resources are available
def ensure_nltk_resources():
resources = [‘punkt’, ‘averaged_perceptron_tagger’]
for resource in resources:
try:
nltk.data.find(resource)
except LookupError:
nltk.download(resource)
ensure_nltk_resources()
Define the CFG non-terminals and terminals
NONTERMINALS = “”"
S → NP VP | S Conj S | VP
NP → Det N | Det AdjP N | N | N PP | Det NP | Pro | PropN
VP → V | V NP | V NP PP | V PP | Adv V NP PP | Aux V NP PP | V Conj VP | V AdvP
PP → P NP
AdjP → Adj | Adj AdjP
AdvP → Adv | Adv AdvP
“”"
TERMINALS = “”"
Det → ‘a’ | ‘an’ | ‘the’
N → ‘holmes’ | ‘armchair’ | ‘home’ | ‘thursday’ | ‘country’ | ‘walk’ | ‘mess’
V → ‘sat’ | ‘chuckled’ | ‘had’ | ‘came’ | ‘walk’ | ‘was’ | ‘were’
Adj → ‘red’ | ‘paint’ | ‘little’ | ‘dreadful’ | ‘complex’ | ‘country’
P → ‘in’ | ‘on’ | ‘at’ | ‘with’
Adv → ‘then’ | ‘later’ | ‘quickly’
Conj → ‘and’
Pro → ‘i’
Aux → ‘did’
PropN → ‘Thursday’ | ‘home’
“”"
Combine nonterminals and terminals to create a CFG
GRAMMAR = NONTERMINALS + TERMINALS
cfg = nltk.CFG.fromstring(GRAMMAR)
Define the parser using this grammar
parser = nltk.ChartParser(cfg)
def preprocess(sentence):
“”"
Preprocess the sentence to lowercased list of words, excluding non-alphabetic words.
“”"
tokens = word_tokenize(sentence.lower())
words = [word for word in tokens if any(char.isalpha() for char in word)]
return words
def np_chunk(tree):
“”"
Return a list of all noun phrase chunks in the sentence tree.
A noun phrase chunk is defined as a NP subtree that does not itself contain any other NPs.
“”"
np_chunks =
for subtree in tree.subtrees():
if subtree.label() == ‘NP’ and not any(child.label() == ‘NP’ for child in subtree if isinstance(child, Tree)):
np_chunks.append(subtree)
return np_chunks
Example usage for testing purposes
if name == “main”:
sentences = [
“Holmes sat.”,
“I had a country walk on Thursday and came home in a dreadful mess.”,
“Holmes sat in the little red armchair.”,
“He quickly walked on a bright Thursday afternoon.”
]
for sentence in sentences:
words = preprocess(sentence)
print(f"Words: {words}")
trees = list(parser.parse(words))
if not trees:
print(f"Could not parse sentence: \"{sentence}\".")
continue
for tree in trees:
tree.pretty_print()
chunks = np_chunk(tree)
chunk_text = [" ".join(word for word, pos in chunk.leaves()) for chunk in chunks]
print("NP Chunks:", chunk_text)