Add last part of new training data for spacy

pull/49/head
Zainab2604 2025-06-02 22:51:47 +02:00
parent 2f159d8c8d
commit 59dde98dcb
14 changed files with 2229 additions and 56 deletions

View File

@ -3,7 +3,7 @@ import os
import json
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, "../spacy_training/output/model-best")
model_path = os.path.join(current_dir, "../spacy_training/output/model-last")
nlp = spacy.load(model_path)

View File

@ -22,10 +22,14 @@
],
"ner":[
"AUSSCH\u00dcTTUNGSRENDITE",
"Aussch\u00fcttungsrendite",
"Laufzeit",
"LAUFZEIT",
"L\u00c4NDERALLOKATION",
"MANAGMENTGEB\u00dcHREN",
"RENDITE",
"RISIKOPROFIL",
"Risikoprofil"
"SEKTORENALLOKATION",
"ZIELAUSSCH\u00dcTTUNG",
"ZIELRENDITE"
]
},
"pipeline":[
@ -40,37 +44,57 @@
],
"performance":{
"ents_f":0.8888888889,
"ents_p":0.8205128205,
"ents_r":0.9696969697,
"ents_f":0.9637305699,
"ents_p":0.93,
"ents_r":1.0,
"ents_per_type":{
"RISIKOPROFIL":{
"p":1.0,
"r":0.9705882353,
"f":0.9850746269
},
"Risikoprofil":{
"p":0.8,
"r":1.0,
"f":0.8888888889
"f":1.0
},
"Laufzeit":{
"p":0.9,
"L\u00c4NDERALLOKATION":{
"p":0.90625,
"r":1.0,
"f":0.9473684211
"f":0.9508196721
},
"AUSSCH\u00dcTTUNGSRENDITE":{
"p":0.5925925926,
"r":0.9411764706,
"f":0.7272727273
},
"Aussch\u00fcttungsrendite":{
"p":0.6666666667,
"p":0.7105263158,
"r":1.0,
"f":0.8
"f":0.8307692308
},
"LAUFZEIT":{
"p":1.0,
"r":1.0,
"f":1.0
},
"RENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"ZIELRENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"ZIELAUSSCH\u00dcTTUNG":{
"p":1.0,
"r":1.0,
"f":1.0
},
"MANAGMENTGEB\u00dcHREN":{
"p":1.0,
"r":1.0,
"f":1.0
},
"SEKTORENALLOKATION":{
"p":1.0,
"r":1.0,
"f":1.0
}
},
"tok2vec_loss":119.7162696429,
"ner_loss":824.8371582031
"tok2vec_loss":26.5951735583,
"ner_loss":742.7924194336
}
}

View File

@ -1 +1 @@
¥movesÚì{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ
¥movesÚL{"0":{},"1":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"2":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"3":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"4":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ

View File

@ -22,10 +22,14 @@
],
"ner":[
"AUSSCH\u00dcTTUNGSRENDITE",
"Aussch\u00fcttungsrendite",
"Laufzeit",
"LAUFZEIT",
"L\u00c4NDERALLOKATION",
"MANAGMENTGEB\u00dcHREN",
"RENDITE",
"RISIKOPROFIL",
"Risikoprofil"
"SEKTORENALLOKATION",
"ZIELAUSSCH\u00dcTTUNG",
"ZIELRENDITE"
]
},
"pipeline":[
@ -40,37 +44,57 @@
],
"performance":{
"ents_f":0.8780487805,
"ents_p":0.9473684211,
"ents_r":0.8181818182,
"ents_f":0.9608938547,
"ents_p":1.0,
"ents_r":0.9247311828,
"ents_per_type":{
"RISIKOPROFIL":{
"p":1.0,
"r":0.9705882353,
"f":0.9850746269
},
"Risikoprofil":{
"p":0.8,
"r":1.0,
"f":0.8888888889
},
"AUSSCH\u00dcTTUNGSRENDITE":{
"p":0.7777777778,
"r":0.4117647059,
"f":0.5384615385
},
"Laufzeit":{
"p":1.0,
"r":1.0,
"f":1.0
},
"Aussch\u00fcttungsrendite":{
"AUSSCH\u00dcTTUNGSRENDITE":{
"p":1.0,
"r":0.5,
"f":0.6666666667
"r":0.5925925926,
"f":0.7441860465
},
"LAUFZEIT":{
"p":1.0,
"r":1.0,
"f":1.0
},
"RENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"L\u00c4NDERALLOKATION":{
"p":1.0,
"r":0.8965517241,
"f":0.9454545455
},
"ZIELRENDITE":{
"p":1.0,
"r":1.0,
"f":1.0
},
"ZIELAUSSCH\u00dcTTUNG":{
"p":1.0,
"r":1.0,
"f":1.0
},
"MANAGMENTGEB\u00dcHREN":{
"p":1.0,
"r":1.0,
"f":1.0
},
"SEKTORENALLOKATION":{
"p":1.0,
"r":1.0,
"f":1.0
}
},
"tok2vec_loss":235.8388520621,
"ner_loss":1878.9451904297
"tok2vec_loss":33.6051129291,
"ner_loss":740.5764770508
}
}

View File

@ -1 +1 @@
¥movesÚì{"0":{},"1":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"2":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"3":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8},"4":{"RISIKOPROFIL":91,"AUSSCH\u00dcTTUNGSRENDITE":40,"Laufzeit":26,"Risikoprofil":10,"Aussch\u00fcttungsrendite":8,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ
¥movesÚL{"0":{},"1":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"2":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"3":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2},"4":{"RISIKOPROFIL":161,"L\u00c4NDERALLOKATION":161,"RENDITE":91,"AUSSCH\u00dcTTUNGSRENDITE":68,"LAUFZEIT":38,"ZIELRENDITE":12,"SEKTORENALLOKATION":12,"MANAGMENTGEB\u00dcHREN":8,"ZIELAUSSCH\u00dcTTUNG":2,"":1},"5":{"":1}}£cfg<66>§neg_keyÀ

View File

@ -313,7 +313,7 @@ TRAINING_DATA = [
),
(
"Geografische Zielallokation nach Investitionsphase des Fonds: 1) Schweden 20-60% Allokation Länder 2) Finnland 20-60% 3) Norwegen 10-40% 4) Dänemark 10-40%",
{"entities": [[65, 80, "Länderallokation"], [102, 117, "Länderallokation"], [121, 136, "Länderallokation"], [140, 155, "Länderallokation"]]},
{"entities": [[65, 80, "LÄNDERALLOKATION"], [102, 117, "LÄNDERALLOKATION"], [121, 136, "LÄNDERALLOKATION"], [140, 155, "LÄNDERALLOKATION"]]},
),
(
"Deutsches Spezial-Sondervermögen mit festen Anlagebedingungen ($284 KAGB) Immobilien- oder Infrastrukturquote (nach Solvency II) Core / Core+ Euro Hauptstadtregionen und andere Großstädte in den Nordics €500 Mio. 4,5-5,5% 15 Jahre; Fonds hat unbegrenzte Laufzeit; Investmentphase 4 Jahre Maximaler Fremdkapitalanteil 50% (LTV-Ziel bei Ankauf), Langfristiges LTV-Ziel auf Fondsebene ist 45% 0,625% p. a. des Bruttofondsvermögens Zeichnungen ab € 30 Mio. - 0,03 % Rabatt Zeichnungen ab € 50 Mio. - zusatzl. 0,03 % Rabatt 1,1% des Verkehrswertes 0,6% der Bruttoverkaufswert 10% wenn Hurdle Rate 5,0 % p. a. (IRR netto) überschritten wird (nach 15 Jahren berechnet) Ja",
@ -442,10 +442,120 @@ TRAINING_DATA = [
(
"Geared Gross IRR seeking a range of 16-18% per annum",
{"entities": [[37, 43, "RENDITE"]]},
),
(
"Open-ended fund 24 months, incl. rolling reinvestment Sale of individual assets with respective management contracts or geared leases IRR: >6.5% | CoC: >5.0%",
{"entities": [[0, 10, "LAUFZEIT"], [139, 144, "RENDITE"]]},
),
(
"Our investment strategy focuses on investing in upscale hotels in European prime locations, including DACH, Italy, Spain, Portugal, France, UK, Denmark, Benelux,and Poland.",
{"entities": [[102, 171, "LÄNDERALLOKATION"]]},
),
(
"Core+ assets with value-add potential, Emerging Gateway Cities Helsinki] Core+ with Value well-mitigated risk and great upside Potential potential through asset improvement or = Max. 20% UK & Ireland {no contract renegotiation currency risk hedging], 80% tinental E > IRR target of 6-9%",
{"entities": [[0, 5, "RISIKOPROFIL"], [282, 286, "RENDITE"]]},
),
(
"10% net IRR since inception in 2018?",
{"entities": [[0, 3, "RENDITE"]]},
),
(
"Eurozone: Benelux, France and Germany",
{"entities": [[10, 37, "LÄNDERALLOKATION"]]},
),
(
"Open-ended, with quarterly liquidity (redemption rights, dual pricing)",
{"entities": [[0, 10, "LAUFZEIT"]]},
),
(
"Class A & B (Institutional): 0.93% on NAV; Class D (Wholesale): 1.80% on NAV; Class P (Wholesale): 1.25% on NAV",
{"entities": [[29, 34, "MANAGMENTGEBÜHREN"], [64, 69, "MANAGMENTGEBÜHREN"], [99, 104, "MANAGMENTGEBÜHREN"]]},
),
(
"Risk profile: favour core > © at least and core+ assets with a targeted N 2 n allocation to value add assets to enhance returns",
{"entities": [[21, 25, "RISIKOPROFIL"], [43, 48, "RISIKOPROFIL"]]},
),
(
"The Netherlands (38 assets) = Germany (9 assets) 10 largest Country assets split France (8 assets)",
{"entities": [[0, 15, "LÄNDERALLOKATION"], [30, 37, "LÄNDERALLOKATION"], [81, 87, "LÄNDERALLOKATION"]]},
),
(
"Expected IRR 10.9%",
{"entities": [[13, 18, "ZIELRENDITE"]]},
),
(
"Structure Open-end, perpetual life, Luxembourg domiciled Initial Target Size* €2 billion 6-8% total return,",
{"entities": [[10, 18, "LAUFZEIT"], [89, 93, "RENDITE"]]},
),
(
"Geographic Focus: UK, Ireland, Iberia, Nordics, Netherlands, Germany, France, Italy",
{"entities": [[18, 83, "LÄNDERALLOKATION"]]},
),
(
"IRR of 13-14%",
{"entities": [[7, 13, "RENDITE"]]},
),
(
"Value-add",
{"entities": [[0, 9, "RISIKOPROFIL"]]},
),
(
"Geographic allocation NORDICS UNITED KINGDOM GERMANY FRANCE PORTUGAL BENELUX",
{"entities": [[22, 76, "LÄNDERALLOKATION"]]},
),
(
"Strong track record delivering a 17% net IRR, 1.7x net multiple across all divested assets (both discretionary and non-discretionary mandates)",
{"entities": [[33, 36, "RENDITE"]]},
),
(
"Targeting a 7-8% net annual return and a 3-4% dividend yield, reflecting a target LTV of 35% (capped at 37.5%)",
{"entities": [[12, 16, "RENDITE"]]},
),
(
"Sweden Norway Denmark Finland",
{"entities": [[0, 29, "LÄNDERALLOKATION"]]},
),
(
"Logistics Residential Office Other",
{"entities": [[0, 34, "SEKTORENALLOKATION"]]},
),
(
"Fund Term Open-ended with an initial 24-month lock-in for new investors",
{"entities": [[10, 20, "LAUFZEIT"]]},
),
(
"Management fee of 85 bps on NAV.",
{"entities": [[18, 24, "MANAGMENTGEBÜHREN"]]},
),
(
"Core/Core+ strategy, with tactical exposure to development projects aiming at enhancing the quality of the portfolio over time",
{"entities": [[0, 10, "RISIKOPROFIL"]]},
),
(
"Fund term: Open-ended",
{"entities": [[11, 21, "LAUFZEIT"]]},
),
(
"Return targets: The fund targets a net internal rate of return (IRR) of 8% and a net annual income yield of 5% with planned quarterly distributions.",
{"entities": [[72, 74, "RENDITE"]]},
),
(
"Geographic scope: The fund has a broad mandate to invest in commercial and residential real estate across Sweden, Denmark, Finland, and Norway. 50% LTV Asset selection: Heirs to acquire high-quality, income-generating properties in major Nordic cities and enhance their value through active asset management. Portfolio construction: The goal is to build diversified portfolios that are appealing to core buyers upon exit.",
{"entities": [[106, 142, "LÄNDERALLOKATION"]]},
),
(
"Experience: Since 2012, | | has demonstrated its capability to build diversified and resilient portfolios for its core-plus funds. German Real Estate Quota advantage . Local expertise: extensive local relationships and proprietary deal flow in key Nordic markets provide a strategic advantage.",
{"entities": [[114, 123, "RISIKOPROFIL"]]},
),
(
"Target returns: 8% net IRR with 5% net annual income yield! * Geographic focus: Sweden, Denmark, Norway and Finland « Target leverage: 50% LTV (excluding short-term borrowing) « Sector exposure: office, logistics, public properties, retail (focused on grocery anchored and necessity driven retail) and residentials « Investment focus: high quality properties,",
{"entities": [[16, 18, "RENDITE"], [80, 115, "LÄNDERALLOKATION"], [195, 239, "SEKTORENALLOKATION"]]},
),
(
"The Fund 2 xemoours common limited partnership (SCS) (SICAV-RAIF) Investment Objective To pursue investments in commercial and residential properties throughout the Nordic Region Fund Target Size €300 million (equity) Return Targets Target net IRR of 8%, target net annual income yield of 5%",
{"entities": [[251, 253, "RENDITE"]]},
)
]

View File

@ -1,5 +1,8 @@
import os
from pathlib import Path
import spacy
from spacy.cli.train import train
from spacy.tokens import DocBin
@ -29,3 +32,9 @@ for text, annot in tqdm(TRAINING_DATA):
# save the DocBin object
os.makedirs("./data", exist_ok=True)
db.to_disk("./data/train.spacy")
config_path = Path("config.cfg")
output_path = Path("output")
print("Starte Training...")
train(config_path, output_path)