>
5 bedroom detached house for sale
Hermon, Cynwyl Elfed, Carmarthen
£425,000
Hermon, Cynwyl Elfed, Carmarthen
£425,000
Price History
Initial price | £440,000 |
29/03/24 | £425,000 |
Price Change | -3.41% |
Description
```
I am trying to convert the above property description into a machine-readable format using spaCy. I have tried using spaCy's `Dependencies` and `EntityRecognizer` but I am struggling to extract the information in a structured way.
Here's what I've tried so far:
```python
import spacy
from spacy.matcher import Matcher
from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
matcher = Matcher(nlp.vocab)
pattern = [{"LOWER": "approximately"}, {"LOWER": "miles"}]
matcher.add("distance", [pattern])
matcher = Matcher(nlp.vocab)
pattern = [{"LOWER": "located"}, {"LOWER": "approximately"}]
matcher.add("location", [pattern])
entities = ["County town", "market town", "village", "local amenities", "integral garage", "driveway parking", "rear garden", "living room", "bedrooms", "master bedroom", "en suite", "family bathroom", "shower room", "LPG hob", "open plan kitchen", "dining area", "conservatory", "utility room", "shower", "bath", "separate shower and bath"]
matches = matcher(doc)
entities_dict = defaultdict(list)
for ent in entities:
entities_dict[ent].extend([(e.start_char, e.end_char) for e in doc.ents if e.label_ == ent])
for ent_name, spans in entities_dict.items():
if ent_name == "location":
entities_dict[ent_name] = [(start, end) for start, end in spans if doc[start:end].text not in ["located", "approximately"]]
for ent_name, spans in entities_dict.items():
print(f"{ent_name}: {[doc[spans[i]:spans[i]+1].text for i in range(len(spans))