from pylangacq.util import CLITIC, get_lemma_from_mor
from pylangacq.dependency import DependencyGraph
# noinspection PyPep8Naming
[docs]def get_MLUm(tagged_sents, pos_to_ignore=None):
"""Mean length of utterance (MLU) in morphemes"""
# *tagged_sents* already filtered for the desired participant like 'CHI'
total_utterance_count = 0
total_morpheme_count = 0
for tagged_sent in tagged_sents:
total_utterance_count += 1
for tagged_word in tagged_sent:
pos = tagged_word[1]
morph = tagged_word[2]
if pos_to_ignore and pos in pos_to_ignore:
continue
total_morpheme_count += 1
total_morpheme_count += morph.count("-")
total_morpheme_count += morph.count("~")
if total_utterance_count:
return total_morpheme_count / total_utterance_count
else:
return 0
# noinspection PyPep8Naming
[docs]def get_MLUw(sents, words_to_ignore=None):
"""Mean length of utterance (MLU) in words"""
# *sents* are already filtered for the desired participant like 'CHI'
total_utterance_count = 0
total_word_count = 0
for sent in sents:
total_utterance_count += 1
for word in sent:
if words_to_ignore and word in words_to_ignore:
continue
total_word_count += 1
if total_utterance_count:
return total_word_count / total_utterance_count
else:
return 0
# noinspection PyPep8Naming
[docs]def get_TTR(word_freq_dict, words_to_ignore=None):
"""Type-token ratio (TTR)"""
# *word_freq_dict* already filtered for the desired participant like 'CHI'
if words_to_ignore:
word_freq_dict = {
word: freq
for word, freq in word_freq_dict.items()
if word not in words_to_ignore
}
return len(word_freq_dict) / sum(word_freq_dict.values())
# noinspection PyPep8Naming
[docs]def get_IPSyn(tagged_sents):
"""Index of Productive Syntax (IPSyn)"""
if len(tagged_sents) > 100:
tagged_sents = tagged_sents[:100]
scoring_board = {
"N1": 0,
"N2": 0,
"N3": 0,
"N4": 0,
"N5": 0,
"N6": 0,
"N7": 0,
"N8": 0,
"N9": 0,
"N10": 0,
"N11": 0,
"V1": 0,
"V2": 0,
"V3": 0,
"V4": 0,
"V5": 0,
"V6": 0,
"V7": 0,
"V8": 0,
"V9": 0,
"V10": 0,
"V11": 0,
"V12": 0,
"V13": 0,
"V14": 0,
"V15": 0,
"V16": 0,
"Q1": 0,
"Q2": 0,
"Q3": 0,
"Q4": 0,
"Q5": 0,
"Q6": 0,
"Q7": 0,
"Q8": 0,
"Q9": 0,
"Q10": 0,
"S1": 0,
"S2": 0,
"S3": 0,
"S4": 0,
"S5": 0,
"S6": 0,
"S7": 0,
"S8": 0,
"S9": 0,
"S10": 0,
"S11": 0,
"S12": 0,
"S13": 0,
"S14": 0,
"S15": 0,
"S16": 0,
"S17": 0,
"S18": 0,
"S19": 0,
}
scoring_board_stop = {
"N1": False,
"N2": False,
"N3": False,
"N4": False,
"N5": False,
"N6": False,
"N7": False,
"N8": False,
"N9": False,
"N10": False,
"N11": False,
"V1": False,
"V2": False,
"V3": False,
"V4": False,
"V5": False,
"V6": False,
"V7": False,
"V8": False,
"V9": False,
"V10": False,
"V11": False,
"V12": False,
"V13": False,
"V14": False,
"V15": False,
"V16": False,
"Q1": False,
"Q2": False,
"Q3": False,
"Q4": False,
"Q5": False,
"Q6": False,
"Q7": False,
"Q8": False,
"Q9": False,
"Q10": False,
"S1": False,
"S2": False,
"S3": False,
"S4": False,
"S5": False,
"S6": False,
"S7": False,
"S8": False,
"S9": False,
"S10": False,
"S11": False,
"S12": False,
"S13": False,
"S14": False,
"S15": False,
"S16": False,
"S17": False,
"S18": False,
"S19": False,
}
def add_one_point_if_needed(item):
"""
Add one point to *item* if necessary. If *item* has scored 2 points,
avoid further scoring by setting scoring_board_stop[*item*] to True.
:param item: check item like 'N1', 'Q3' etc
"""
if not scoring_board_stop[item]:
scoring_board[item] += 1
else:
return
if scoring_board[item] >= 2:
scoring_board_stop[item] = True
def turn_off_scoring_board(item):
"""
Check if scoring_board[*item*] is already 2 and, if so: (i) set
scoring_board_stop[*item*] to True, and (ii) return True.
Return False otherwise.
:param item: check item like 'N1', 'Q3' etc
"""
if scoring_board[item] >= 2:
scoring_board_stop[item] = True
return True
else:
return False
def test_item(item_function):
item = item_function.__name__
for tagged_sent in tagged_sents:
if scoring_board_stop[item]:
break
sent_graph = DependencyGraph(tagged_sent)
if sent_graph.faulty():
continue
item_function(sent_graph)
if turn_off_scoring_board(item):
break
@test_item
def N1(graph):
"""
N1: Proper, mass, or count noun
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos.startswith("N:") or pos == "N":
scoring_board["N1"] += 1
if turn_off_scoring_board("N1"):
break
# noinspection PyPep8Naming
@test_item
def N2(graph):
"""
N2: Pronoun or prolocative, excluding modifiers
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos.startswith("PRO") and pos != "PRO:POSS:DET":
scoring_board["N2"] += 1
if turn_off_scoring_board("N2"):
break
# noinspection PyPep8Naming
@test_item
def N3(graph):
"""
N3: Modifier, including adjectives, possessives, and quantifiers
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos in {"PRO:POSS:DET", "ADJ", "QN"}:
scoring_board["N3"] += 1
if turn_off_scoring_board("N3"):
break
# noinspection PyPep8Naming
@test_item
def N4(graph):
"""
N4: Two-word NP: nominal preceded by article or modifier
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes() - 1):
pos1 = graph.node[i]["pos"]
pos2 = graph.node[i + 1]["pos"]
if pos1 in {"PRO:POSS:DET", "ADJ", "QN"} and (
pos2.startswith("N:") or pos2 == "N"
):
scoring_board["N4"] += 1
if turn_off_scoring_board("N4"):
break
# noinspection PyPep8Naming
@test_item
def N5(graph):
"""
N5: Article, used before a noun (Also credit: N4)
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes() - 1):
pos1 = graph.node[i]["pos"]
pos2 = graph.node[i + 1]["pos"]
if pos1 == "DET" and (pos2.startswith("N:") or pos2 == "N"):
scoring_board["N5"] += 1
add_one_point_if_needed("N4")
if turn_off_scoring_board("N5"):
break
# noinspection PyPep8Naming
@test_item
def N6(graph):
"""
N6: Two-word NP (as in N4) after verb or preposition (Also credit: N4)
"""
if not graph.number_of_nodes() > 3:
return
for i in range(1, graph.number_of_nodes() - 2):
pos1 = graph.node[i]["pos"]
pos2 = graph.node[i + 1]["pos"]
pos3 = graph.node[i + 2]["pos"]
if (
pos2 in {"PRO:POSS:DET", "ADJ", "QN"}
and (pos3.startswith("N:") or pos3 == "N")
and (pos1 in {"V", "PREP"})
):
scoring_board["N6"] += 1
add_one_point_if_needed("N4")
if turn_off_scoring_board("N6"):
break
# noinspection PyPep8Naming
@test_item
def N7(graph):
"""
N7: Plural suffix
"""
for i in range(1, graph.number_of_nodes()):
mor = graph.node[i]["mor"]
if "-PL" in mor:
scoring_board["N7"] += 1
if turn_off_scoring_board("N7"):
break
# noinspection PyPep8Naming
@test_item
def N8(graph):
"""
N8: Two-word NP (as in N4) before verb (Also credit: N4)
"""
if not graph.number_of_nodes() > 3:
return
for i in range(1, graph.number_of_nodes() - 2):
pos1 = graph.node[i]["pos"]
pos2 = graph.node[i + 1]["pos"]
pos3 = graph.node[i + 2]["pos"]
if (
pos1 in {"PRO:POSS:DET", "ADJ", "QN"}
and (pos2.startswith("N:") or pos2 == "N")
and (pos3 == "V")
):
scoring_board["N8"] += 1
add_one_point_if_needed("N4")
if turn_off_scoring_board("N8"):
break
# noinspection PyPep8Naming
@test_item
def N9(graph):
"""
N9: Three-word NP (Det/Mod+Mod+N) (Also credit: N4)
"""
if not graph.number_of_nodes() > 3:
return
for i in range(1, graph.number_of_nodes() - 2):
pos1 = graph.node[i]["pos"]
pos2 = graph.node[i + 1]["pos"]
pos3 = graph.node[i + 2]["pos"]
if (
(pos1 in {"PRO:POSS:DET", "ADJ", "QN"})
and (pos2 in {"ADJ", "QN"})
and (pos3.startswith("N:") or pos3 == "N")
):
scoring_board["N9"] += 1
add_one_point_if_needed("N4")
if turn_off_scoring_board("N9"):
break
# noinspection PyPep8Naming
@test_item
def N10(graph):
"""
N10: Adverb modifying adjective or nominal (Also credit: V8)
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos == "ADV":
for j in graph.edge[i].keys():
pos_of_head = graph.node[j]["pos"]
if pos_of_head in {"ADJ", "N"}:
scoring_board["N10"] += 1
add_one_point_if_needed("V8")
break
if turn_off_scoring_board("N10"):
break
# noinspection PyPep8Naming
@test_item
def N11(graph):
"""
N11: Any other bound morpheme on N or adjective
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos in {"N", "ADJ"} or pos.startswith("N:"):
mor = graph.node[i]["mor"]
mor = mor.replace("-PL", "")
if "-" in mor:
scoring_board["N11"] += 1
if turn_off_scoring_board("N11"):
break
# noinspection PyPep8Naming
@test_item
def V1(graph):
"""
V1: Verb
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos == "V":
scoring_board["V1"] += 1
if turn_off_scoring_board("V1"):
break
# noinspection PyPep8Naming
@test_item
def V2(graph):
"""
V2: Particle or Preposition
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos == "PREP":
scoring_board["V2"] += 1
if turn_off_scoring_board("V2"):
break
# noinspection PyPep8Naming
@test_item
def V3(graph):
"""
V3: Prepositional phrase (Prep + NP) (Also credit: V2)
"""
for i in range(1, graph.number_of_nodes()):
for j in graph.edge[i].keys():
if graph.edge[i][j]["rel"] == "POBJ":
scoring_board["V3"] += 1
add_one_point_if_needed("V2")
if turn_off_scoring_board("V3"):
break
# noinspection PyPep8Naming
@test_item
def V4(graph):
"""
V4: Copula linking two nominals (Also credit: V1)
"""
if not graph.number_of_nodes() > 3:
return
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos != "COP":
continue
subject = False
predicate = False
for dep, head in graph.edges().items():
if head != i:
continue
if graph.edge[dep][head]["rel"] == "SUBJ" and not graph.node[dep][
"pos"
].endswith("WH"):
subject = True
elif graph.edge[dep][head]["rel"] == "PRED":
predicate = True
if subject and predicate:
scoring_board["V4"] += 1
add_one_point_if_needed("V1")
if turn_off_scoring_board("V4"):
break
# noinspection PyPep8Naming
@test_item
def V5(graph):
"""
V5: Catenative (pseudo-auxiliary) preceding a verb
"""
if not graph.number_of_nodes() > 2:
return
pseudo_aux = {
"hafta",
"haf(ta)",
"s'pose(da)",
"s'poseda",
"gonna",
"gon(na)",
"wanna",
"wanta",
"wan(t)(a)",
"want(a)",
"wan(na)",
"gotta",
"got(ta)",
"better",
}
for i in range(1, graph.number_of_nodes() - 1):
pos2 = graph.node[i + 1]["pos"]
if pos2 != "V":
continue
word1 = graph.node[i]["word"]
if word1 in pseudo_aux:
scoring_board["V5"] += 1
if turn_off_scoring_board("V5"):
break
# noinspection PyPep8Naming
@test_item
def V6(graph):
"""
V6: Auxiliary be, do, have in VP (Also credit: V5)
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
mor = graph.node[i]["mor"]
lemma = get_lemma_from_mor(mor)
if (pos == "AUX" and not mor.startswith("wi")) or (
lemma == "do" and pos == "MOD"
):
scoring_board["V6"] += 1
add_one_point_if_needed("V5")
if turn_off_scoring_board("V6"):
break
# noinspection PyPep8Naming
@test_item
def V7(graph):
"""
V7: Progressive suffix
"""
for i in range(1, graph.number_of_nodes()):
mor = graph.node[i]["mor"]
if mor.endswith("PRESP"):
scoring_board["V7"] += 1
if turn_off_scoring_board("V7"):
break
# noinspection PyPep8Naming
@test_item
def V8(graph):
"""
V8: Adverbs
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos == "ADV":
scoring_board["V8"] += 1
if turn_off_scoring_board("V8"):
break
# noinspection PyPep8Naming
@test_item
def V9(graph):
"""
V9: Modal preceding verb (Also credit: V5)
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes() - 1):
pos = graph.node[i]["pos"]
word = graph.node[i]["word"]
pos2 = graph.node[i + 1]["pos"]
if pos.startswith("MOD") and pos2 == "V" and word != CLITIC:
scoring_board["V9"] += 1
add_one_point_if_needed("V5")
if turn_off_scoring_board("V9"):
break
# noinspection PyPep8Naming
@test_item
def V10(graph):
"""
V10: Third person singular present tense suffix
"""
for i in range(1, graph.number_of_nodes()):
mor = graph.node[i]["mor"]
if "-3S" in mor:
scoring_board["V10"] += 1
if turn_off_scoring_board("V10"):
break
# noinspection PyPep8Naming
@test_item
def V11(graph):
"""
V11: Past tense modal (Also credit V9)
"""
past_tense_modals = {"could", "did", "might", "would", "woudn't"}
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos != "MOD":
continue
if graph.node[i]["word"] in past_tense_modals:
scoring_board["V11"] += 1
add_one_point_if_needed("V9")
if turn_off_scoring_board("V11"):
break
# noinspection PyPep8Naming
@test_item
def V12(graph):
"""
V12: Regular past tense suffix
"""
for i in range(1, graph.number_of_nodes()):
mor = graph.node[i]["mor"]
if "-PAST" in mor and "-PASTP" not in mor:
scoring_board["V12"] += 1
if turn_off_scoring_board("V12"):
break
# noinspection PyPep8Naming
@test_item
def V13(graph):
"""
V13: Past tense auxiliary (Also credit V6)
"""
aux_pos = {"AUX", "MOD"}
for i in range(1, graph.number_of_nodes()):
mor = graph.node[i]["mor"]
pos = graph.node[i]["pos"]
if "&PAST" in mor and pos in aux_pos:
scoring_board["V13"] += 1
add_one_point_if_needed("V6")
if turn_off_scoring_board("V13"):
break
# noinspection PyPep8Naming
@test_item
def V14(graph):
"""
V14: Medial adverb (Also credit V8)
"""
for i in range(2, graph.number_of_nodes() - 1):
# note the possible values of i for "medial" (not 1st or last word)
pos = graph.node[i]["pos"]
if pos == "ADV":
scoring_board["V14"] += 1
add_one_point_if_needed("V8")
if turn_off_scoring_board("V14"):
break
# noinspection PyPep8Naming
@test_item
def V15(graph):
"""
V15: Copula, modal, or auxiliary for emphasis or ellipsis
(uncontractible context) (Also credit V4, V6, V9, V11, V13, V16)
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes() - 1):
pos1 = graph.node[i]["pos"]
if pos1 not in {"COP", "AUX", "MOD"}:
continue
mor2 = graph.node[i + 1]["mor"]
if mor2 in {"", "beg", "end"}: # if mor2 is a punctuation
scoring_board["V15"] += 1
add_one_point_if_needed("V4")
add_one_point_if_needed("V6")
add_one_point_if_needed("V9")
add_one_point_if_needed("V11")
add_one_point_if_needed("V13")
add_one_point_if_needed("V16")
if turn_off_scoring_board("V15"):
break
# noinspection PyPep8Naming
@test_item
def V16(graph):
"""
V16: Past tense copula (Also credit V4)
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
mor = graph.node[i]["mor"]
if pos.startswith("COP") and "PAST" in mor:
scoring_board["V16"] += 1
add_one_point_if_needed("V4")
if turn_off_scoring_board("V16"):
break
# noinspection PyPep8Naming
@test_item
def Q1(graph):
"""
Q1: Intonationally marked question
Automatically score 2 points if child earns 2 points on Q4 and/or Q8
"""
final_word = graph.node[graph.number_of_nodes() - 1]["word"]
if final_word != "?":
return
first_word = graph.node[1]["word"]
if first_word in {"what", "why", "how", "which", "where", "when"}:
return
scoring_board["Q1"] += 1
if turn_off_scoring_board("Q1"):
pass
# noinspection PyPep8Naming
@test_item
def Q2(graph):
"""
Q2: Routine do/go or existence/name question or wh-pronoun alone
Automatically score 2 points if child earns 2 points on Q4 and/or Q8
"""
# needs work here
# currently only testing for wh-pronoun alone
final_word = graph.node[graph.number_of_nodes() - 1]["word"]
if final_word != "?":
return
first_word = graph.node[1]["word"]
if first_word not in {"what", "why", "how", "which", "where", "when"}:
return
if graph.number_of_nodes() > 2:
scoring_board["Q2"] += 1
if turn_off_scoring_board("Q2"):
pass
# noinspection PyPep8Naming
@test_item
def Q3(graph):
"""
Q3: Simple negation (neg + X):
neg = no(t), can't, don't
X = NP, VP, PP, Adj, Adv, etc.
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes() - 1):
word1 = graph.node[i]["word"]
mor2 = graph.node[i + 1]["mor"]
if word1 in {"no", "not", "can't", "don't"} and mor2 not in {
"",
"beg",
"end",
}:
scoring_board["Q3"] += 1
if turn_off_scoring_board("Q3"):
break
# noinspection PyPep8Naming
@test_item
def Q4(graph):
"""
Q4: Initial wh-pronoun followed by verb
(if child earns 2 points for Q8, score 2 points to *both* Q1 and Q2)
"""
if not graph.number_of_nodes() > 2:
return
final_word = graph.node[graph.number_of_nodes() - 1]["word"]
if final_word != "?":
return
first_word = graph.node[1]["word"]
if first_word not in {"what", "why", "how", "which", "where", "when"}:
return
root = graph.edges()[1]
if graph.node[root]["pos"] == "V":
scoring_board["Q4"] += 1
if turn_off_scoring_board("Q4"):
scoring_board["Q1"] = 2
scoring_board["Q2"] = 2
scoring_board_stop["Q1"] = True
scoring_board_stop["Q2"] = True
# noinspection PyPep8Naming
@test_item
def Q5(graph):
"""
Q5: Negative morpheme between subject and verb (Also credit: Q3)
"""
if not graph.number_of_nodes() > 3:
return
for dep, head in graph.edges().items():
if dep > head:
continue
rel = graph.edge[dep][head]["rel"]
if rel != "SUBJ":
continue
head_pos = graph.node[head]["pos"]
if head_pos != "V":
continue
for i in range(dep + 1, head): # head > dep
if graph.node[i]["pos"] == "NEG":
scoring_board["Q5"] += 1
add_one_point_if_needed("Q3")
break
if turn_off_scoring_board("Q5"):
break
# noinspection PyPep8Naming
@test_item
def Q6(graph):
"""
Q6: Wh-question with inverted modal, copula, or auxiliary
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes()):
if scoring_board_stop["Q6"]:
break
pos = graph.node[i]["pos"]
if pos not in {"COP", "MOD", "AUX"}:
continue
for dep, head in graph.edges().items():
if head != i:
continue
if dep > head:
continue
# we want "inversion" (= dep-wh comes before head-V)
pos_of_dep = graph.node[dep]["pos"]
if pos_of_dep == "ADV:WH":
scoring_board["Q6"] += 1
if turn_off_scoring_board("Q6"):
break
# noinspection PyPep8Naming
@test_item
def Q7(graph):
"""
Q7: Negation of copula, modal, or auxiliary (Also credit Q5)
"""
if not graph.number_of_nodes() > 2:
return
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos not in {"MOD", "COP", "AUX"}:
continue
for dep, head in graph.edges().items():
if head != i:
continue
pos_of_dep = graph.node[dep]["pos"]
if pos_of_dep == "NEG":
scoring_board["Q7"] += 1
add_one_point_if_needed("Q5")
if turn_off_scoring_board("Q7"):
break
# noinspection PyPep8Naming
@test_item
def Q8(graph):
"""
Q8: Yes/no question with inverted modal, copula, or auxiliary
(if child earns 2 points for Q8, score 2 points to *both* Q1 and Q2)
"""
# test may need to be checked/improved
if not graph.number_of_nodes() > 2:
return
final_word = graph.node[graph.number_of_nodes() - 1]["word"]
if final_word != "?":
return
for i in range(1, graph.number_of_nodes() - 1):
if scoring_board_stop["Q8"]:
break
pos1 = graph.node[i]["pos"]
if i != 1:
wh_test = graph.node[i - 1]["pos"]
else:
wh_test = "dummy"
if pos1 in {"COP", "MOD", "AUX"} and not wh_test.endswith("WH"):
for j in graph.edge[i + 1].keys():
rel2 = graph.edge[i + 1][j]["rel"]
if rel2 == "SUBJ":
scoring_board["Q8"] += 1
if turn_off_scoring_board("Q8"):
scoring_board["Q1"] = 2
scoring_board["Q2"] = 2
scoring_board_stop["Q1"] = True
scoring_board_stop["Q2"] = True
break
# noinspection PyPep8Naming
@test_item
def Q9(graph):
"""
Q9: Why, when, which, whose
"""
wh = {"why", "when", "which", "whose"}
for i in range(1, graph.number_of_nodes()):
word = graph.node[i]["word"]
if word in wh:
scoring_board["Q9"] += 1
if turn_off_scoring_board("Q9"):
break
# noinspection PyPep8Naming
@test_item
def Q10(graph):
"""
Q10: Tag question
"""
if not graph.number_of_nodes() > 2:
return
# Part 1: test for ending "okay ?", "ok ?", "right ?"
final_word = graph.node[graph.number_of_nodes() - 1]["word"]
if final_word != "?":
return
second_final_word = graph.node[graph.number_of_nodes() - 2]["word"]
if second_final_word in {"okay", "ok", "right"}:
scoring_board["Q10"] += 1
if turn_off_scoring_board("Q10"):
return
# Part 2: test for "normal" tag questions
good_pos = {"COP NEG PRO ?", "COP PRO ?"}
collate = []
for i in range(1, graph.number_of_nodes()):
collate.append(graph.node[i]["pos"])
test = " ".join(collate)
for tag in good_pos:
if tag in test:
scoring_board["Q10"] += 1
if turn_off_scoring_board("Q10"):
break
# noinspection PyPep8Naming
@test_item
def S1(graph):
"""
S1: Two-word combination
"""
if not graph.number_of_nodes() > 2:
return
scoring_board["S1"] += 1
if turn_off_scoring_board("S1"):
pass
# noinspection PyPep8Naming
@test_item
def S2(graph):
"""
S2: Subject-verb sequence (Also credit: S1)
"""
if not graph.number_of_nodes() > 2:
return
for dep, head in graph.edges().items():
if dep > head:
continue
rel = graph.edge[dep][head]["rel"]
if rel != "SUBJ":
continue
head_pos = graph.node[head]["pos"]
if head_pos == "V":
scoring_board["S2"] += 1
add_one_point_if_needed("S1")
if turn_off_scoring_board("S2"):
break
# noinspection PyPep8Naming
@test_item
def S3(graph):
"""
S3: Verb-object sequence (Also credit: S1)
"""
if not graph.number_of_nodes() > 2:
return
for dep, head in graph.edges().items():
if dep < head:
continue
rel = graph.edge[dep][head]["rel"]
if rel != "OBJ":
continue
head_pos = graph.node[head]["pos"]
if head_pos == "V":
scoring_board["S3"] += 1
add_one_point_if_needed("S1")
if turn_off_scoring_board("S3"):
break
# noinspection PyPep8Naming
@test_item
def S4(graph):
"""
S4: Subject-verb-object sequence (Also credit: S2 & S3)
"""
if not graph.number_of_nodes() > 3:
return
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos != "V":
continue
has_subject = False
has_object = False
for dep, test_verb in graph.edges().items():
if i != test_verb:
continue
if dep < test_verb and graph.edge[dep][test_verb]["rel"] == "SUBJ":
has_subject = True
if dep > test_verb and graph.edge[dep][test_verb]["rel"] == "OBJ":
has_object = True
if has_subject and has_object:
scoring_board["S4"] += 1
add_one_point_if_needed("S2")
add_one_point_if_needed("S3")
if turn_off_scoring_board("S4"):
break
# noinspection PyPep8Naming
@test_item
def S5(graph):
"""
S5: Conjunction (any)
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos == "CONJ":
scoring_board["S5"] += 1
if turn_off_scoring_board("S5"):
break
# noinspection PyPep8Naming
@test_item
def S6(graph):
"""
S6: Sentence with two VPs
"""
if not graph.number_of_nodes() > 4:
return
all_edges = graph.edges()
verbs = []
deps_of_verbs = []
for dep, head in all_edges.items():
head_pos = graph.node[head]["pos"]
if head_pos != "V":
continue
verbs.append(head)
deps_of_verbs.append(dep)
if len(verbs) == 2 and tuple(verbs) not in list(all_edges.items()):
scoring_board["S6"] += 1
if turn_off_scoring_board("S6"):
pass
# noinspection PyPep8Naming
@test_item
def S7(graph):
"""
S7: Conjoined phrases (Also credit: S5)
"""
if not graph.number_of_nodes() > 3:
return
# for all trios, we want the middle word to be CONJ (for pos)
# and the first+final words are *not* punctuation (for mor)
for i in range(1, graph.number_of_nodes() - 2):
mor1 = graph.node[i]["mor"]
pos2 = graph.node[i + 1]["pos"]
mor3 = graph.node[i + 2]["mor"]
punctuations = {"", "beg", "end"}
if pos2 == "CONJ" and mor1 not in punctuations and mor3 not in punctuations:
scoring_board["S7"] += 1
add_one_point_if_needed("S5")
if turn_off_scoring_board("S7"):
break
# noinspection PyPep8Naming
@test_item
def S8(graph):
"""
S8: Infinitive without catenative, marked with "to"
(Also credit: S6 & V5)
"""
if not graph.number_of_nodes() > 3:
return
# we want:
# -- main verb (ROOT for rel)
# -- infinitive "to" with a head *not* being the main verb
for dep, head in graph.edges().items():
pos = graph.node[dep]["pos"]
if pos != "INF":
continue
inf_verb = head
for test_verb, new_head in graph.edges().items():
if inf_verb != test_verb:
continue
if not graph.edge[inf_verb][new_head]["rel"].endswith("ROOT"):
scoring_board["S8"] += 1
add_one_point_if_needed("S6")
add_one_point_if_needed("V5")
break
if turn_off_scoring_board("S8"):
break
# noinspection PyPep8Naming
@test_item
def S9(graph):
"""
S9: Let/Make/Help/Watch introducer
(also needs a dependent verb, according to the examples)
"""
targets = {"let", "make", "help", "watch"}
all_edges = graph.edges()
for dep, head in all_edges.items():
if dep != 1:
continue
if graph.node[dep]["word"] not in targets:
continue
target_head = dep
for test_dep, test_head in all_edges.items():
if test_head != target_head:
continue
if graph.node[test_dep]["pos"] == "V":
scoring_board["S9"] += 1
break
if turn_off_scoring_board("S9"):
break
# noinspection PyPep8Naming
@test_item
def S10(graph):
"""
S10: Adverbial conjunction (Also credit: S5)
(conjunction excluding "and", "or", "then" -- according to examples)
"""
exceptions = {"and", "or", "then"}
for i in range(1, graph.number_of_nodes()):
word = graph.node[i]["word"]
pos = graph.node[i]["pos"]
if pos == "CONJ" and word not in exceptions:
scoring_board["S10"] += 1
add_one_point_if_needed("S5")
if turn_off_scoring_board("S10"):
break
# noinspection PyPep8Naming
@test_item
def S11(graph):
"""
S11: Propositional complement (Also credit S6)
"""
if not graph.number_of_nodes() > 3:
return
subject_count = 0
for dep, head in graph.edges().items():
subject_count_increment = False
if (
graph.edge[dep][head]["rel"] == "SUBJ"
and graph.node[dep]["word"] != CLITIC
):
subject_count += 1
subject_count_increment = True
if subject_count_increment and subject_count > 1:
scoring_board["S11"] += 1
add_one_point_if_needed("S6")
if turn_off_scoring_board("S11"):
break
# noinspection PyPep8Naming
@test_item
def S12(graph):
"""
S12: Conjoined sentences (except for imperatives, will usually have
subj+predicate in each clause) (Also credit: S6, S5)
"""
if not graph.number_of_nodes() > 3:
return
for dep, head in graph.edges().items():
dep_word = graph.node[dep]["word"]
if dep_word != "and":
continue
rel = graph.edge[dep][head]["rel"]
if rel == "CONJ" and graph.node[head]["pos"] == "V":
scoring_board["S12"] += 1
add_one_point_if_needed("S6")
add_one_point_if_needed("S5")
if turn_off_scoring_board("S12"):
break
# noinspection PyPep8Naming
@test_item
def S13(graph):
"""
S13: Wh-clause (Also credit S6) (If also infinitive, credit S8 or S17)
"""
if not graph.number_of_nodes() > 3:
return
for dep, head in graph.edges().items():
dep_pos = graph.node[dep]["pos"]
if not dep_pos.endswith("WH"):
continue
inf = False
if dep + 1 in graph.nodes() and graph.node[dep + 1]["word"] == "INF":
inf = True
# we want the head of wh-word to NOT have ROOT as rel
# (= ban a wh question)
rel = ""
for i in graph.edge[head].keys():
rel = graph.edge[head][i]["rel"]
break
if rel != "ROOT":
scoring_board["S13"] += 1
add_one_point_if_needed("S6")
if inf:
add_one_point_if_needed("S8")
add_one_point_if_needed("S17")
if turn_off_scoring_board("S13"):
break
# noinspection PyPep8Naming
@test_item
def S14(graph):
"""
S14: Bitransitive predicate (Also credit S3)
"""
if not graph.number_of_nodes() > 3:
return
dep_head_pairs_for_obj = []
for dep, head in graph.edges().items():
rel = graph.edge[dep][head]["rel"]
if rel != "OBJ":
continue
dep_head_pairs_for_obj.append((dep, head))
heads = [head for _, head in dep_head_pairs_for_obj]
if len(set(heads)) < len(dep_head_pairs_for_obj):
scoring_board["S14"] += 1
add_one_point_if_needed("S3")
if turn_off_scoring_board("S14"):
pass
# noinspection PyPep8Naming
@test_item
def S15(graph):
"""
S15: Sentence with 3 or more VPs (Also credit S6)
"""
if not graph.number_of_nodes() > 3:
return
number_of_verbs = sum(
[
1
for i in range(1, graph.number_of_nodes())
if graph.node[i]["pos"] == "V"
]
)
if number_of_verbs > 2:
scoring_board["S15"] += 1
add_one_point_if_needed("S6")
if turn_off_scoring_board("S15"):
pass
# noinspection PyPep8Naming
@test_item
def S16(graph):
"""
S16: Relative clause, marked or unmarked (Also credit: S6)
"""
# "search for a CMOD where the dependent is to the right of the head"
# (from Sagae et al 2005 ACL)
# add one criterion: "and" is not one of the intervening words
if not graph.number_of_nodes() > 3:
return
for dep, head in graph.edges().items():
if dep < head:
continue
if graph.edge[dep][head]["rel"] != "CMOD":
continue
and_ = False
for i in range(head + 1, dep): # dep > head
if graph.node[i]["word"] == "and":
and_ = True
break
if not and_:
scoring_board["S16"] += 1
add_one_point_if_needed("S6")
if turn_off_scoring_board("S16"):
break
# noinspection PyPep8Naming
@test_item
def S17(graph):
"""
S17: Infinitive clause: new subject. (Also credit: S8)
"""
if not graph.number_of_nodes() > 3:
return
# example of a hit case: "he wants me to go"
# ("me" is the new subject for the infinitive clause)
for dep, head in graph.edges().items():
word = graph.node[dep]["word"]
pos = graph.node[dep]["pos"]
if word != "to" or pos != "INF":
continue
inf_verb = head # "go" in the example
main_verb = graph.edges()[inf_verb] # "wants"
# check if there's an object of "wants"
for test_obj, test_main_verb in graph.edges().items():
if test_main_verb != main_verb:
continue
if graph.edge[test_obj][test_main_verb]["rel"] == "OBJ":
scoring_board["S17"] += 1
add_one_point_if_needed("S8")
break
if turn_off_scoring_board("S17"):
break
# noinspection PyPep8Naming
@test_item
def S18(graph):
"""
S18: Gerund (Also credit: V7)
"""
for i in range(1, graph.number_of_nodes()):
pos = graph.node[i]["pos"]
if pos == "N:GERUND":
scoring_board["S18"] += 1
add_one_point_if_needed("V7")
if turn_off_scoring_board("S18"):
break
# noinspection PyPep8Naming
@test_item
def S19(graph):
"""
S19: Front or center-embedded subordinate clause (Also credit: S6)
"""
# might need more work
# for now: check if CONJ precedes two SUBJ's
conj_position = graph.number_of_nodes() # decrement if CONJ is found
subj_position_list = []
for dep, head in graph.edges().items():
pos = graph.node[dep]["pos"]
rel = graph.edge[dep][head]["rel"]
if pos == "CONJ" and dep < conj_position:
conj_position = dep
if rel == "SUBJ":
subj_position_list.append(dep)
if len(subj_position_list) < 2:
return
if conj_position < min(subj_position_list):
scoring_board["S19"] += 1
add_one_point_if_needed("S6")
if turn_off_scoring_board("S19"):
pass
return sum(scoring_board.values())