mirror of
https://github.com/ArthurIdema/Zoekeend-Phrase-Indexing.git
synced 2026-02-09 01:22:23 +00:00
Compare commits
No commits in common. "872a13a394629cfe2628d446f68c1a433660e42c" and "2a4b77618a42acc368cf92d39ad6feeca055df46" have entirely different histories.
872a13a394
...
2a4b77618a
5
.gitignore
vendored
5
.gitignore
vendored
@ -11,11 +11,6 @@ plot*
|
||||
*lock*
|
||||
*.db
|
||||
*.ciff
|
||||
*.csv
|
||||
*.sync*
|
||||
*.log
|
||||
/trec_eval/
|
||||
/try-scipy/
|
||||
output*.txt
|
||||
results*.txt
|
||||
*.txt
|
||||
|
||||
@ -1,101 +0,0 @@
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from scipy.stats import binomtest
|
||||
HAS_SCIPY = True
|
||||
except Exception:
|
||||
HAS_SCIPY = False
|
||||
|
||||
|
||||
def main(csv_path: str, out_csv: str = 'comparison_vs_minpmi24.csv'):
|
||||
df = pd.read_csv(csv_path)
|
||||
|
||||
|
||||
df = df.copy()
|
||||
if 'min_freq' in df.columns:
|
||||
df['min_freq'] = df['min_freq']
|
||||
if 'min_pmi' in df.columns:
|
||||
df['min_pmi'] = df['min_pmi']
|
||||
if 'map' in df.columns:
|
||||
df['map'] = df['map']
|
||||
|
||||
group_fields = ['mode', 'stopwords', 'min_freq']
|
||||
|
||||
results = []
|
||||
|
||||
# iterate over groups keyed by (mode, stopwords, min_freq)
|
||||
grouped = df.groupby(group_fields)
|
||||
for key, group in grouped:
|
||||
mode, stopwords, min_freq = key
|
||||
|
||||
# iterate over all min_pmi values present in this group
|
||||
for m in sorted(group['min_pmi'].unique()):
|
||||
# extract series for this min_pmi and baseline rows where min_pmi == 24 (same group)
|
||||
map = pd.to_numeric(group[group['min_pmi'] == m].set_index('query')['map'], errors='coerce')
|
||||
|
||||
# Compare to baseline with min_pmi == 24 and min_freq == 1 (same mode & stopwords)
|
||||
baseline = pd.to_numeric(
|
||||
df[
|
||||
(df['mode'] == mode)
|
||||
& (df['stopwords'] == stopwords)
|
||||
& (df['min_pmi'] == 24)
|
||||
& (df['min_freq'] == 1)
|
||||
].set_index('query')['map'],
|
||||
errors='coerce'
|
||||
)
|
||||
|
||||
# align queries
|
||||
paired = pd.DataFrame({
|
||||
'map': map,
|
||||
'baseline': baseline
|
||||
}).dropna()
|
||||
|
||||
better = int((paired['map'] > paired['baseline']).sum())
|
||||
worse = int((paired['map'] < paired['baseline']).sum())
|
||||
equal = int((paired['map'] == paired['baseline']).sum())
|
||||
n_pairs = len(paired)
|
||||
n_sign = better + worse
|
||||
|
||||
p_value = None
|
||||
if HAS_SCIPY and n_sign > 0:
|
||||
# pass number of positives (better) as k to binomtest
|
||||
p_value = float(binomtest(better, n_sign, p=0.5, alternative='two-sided').pvalue)
|
||||
|
||||
results.append({
|
||||
'mode': mode,
|
||||
'stopwords': stopwords,
|
||||
'min_freq': min_freq,
|
||||
'compared_min_pmi': m,
|
||||
'n_pairs': n_pairs,
|
||||
'n_better': better,
|
||||
'n_worse': worse,
|
||||
'n_equal': equal,
|
||||
'p_value': p_value,
|
||||
})
|
||||
|
||||
|
||||
out_df = pd.DataFrame(results)
|
||||
out_df = out_df.sort_values(['mode', 'stopwords', 'min_freq', 'compared_min_pmi'])
|
||||
out_df.to_csv(out_csv, index=False)
|
||||
|
||||
# Print a short summary
|
||||
total_comparisons = len(out_df)
|
||||
print(f"Wrote {out_csv} ({total_comparisons} comparisons)")
|
||||
if total_comparisons > 0:
|
||||
print(out_df.head(20).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
CSV = './spreadsheets/results_per_query-1-112.csv'
|
||||
OUT = './spreadsheets/p-values-MAP-q1-112.csv'
|
||||
# CSV = './spreadsheets/results_per_query-113-225.csv'
|
||||
# OUT = 'p-values-MAP-q113-225v2.csv'
|
||||
if not Path(CSV).exists():
|
||||
print(f"Input CSV not found: {CSV}")
|
||||
else:
|
||||
if not HAS_SCIPY:
|
||||
print("scipy not found: binomial p-values will be omitted (set up scipy to get p-values)")
|
||||
main(CSV, OUT)
|
||||
|
||||
|
||||
@ -1,97 +0,0 @@
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from scipy.stats import binomtest
|
||||
HAS_SCIPY = True
|
||||
except Exception:
|
||||
HAS_SCIPY = False
|
||||
|
||||
|
||||
def main(csv_path: str, out_csv: str = 'comparison_vs_minpmi24.csv'):
|
||||
df = pd.read_csv(csv_path)
|
||||
df = df.copy()
|
||||
if 'min_freq' in df.columns:
|
||||
df['min_freq'] = df['min_freq']
|
||||
if 'min_pmi' in df.columns:
|
||||
df['min_pmi'] = df['min_pmi']
|
||||
if 'map' in df.columns:
|
||||
df['map'] = df['map']
|
||||
|
||||
group_fields = ['mode', 'stopwords', 'min_freq']
|
||||
|
||||
results = []
|
||||
|
||||
# iterate over groups keyed by (mode, stopwords, min_freq)
|
||||
grouped = df.groupby(group_fields)
|
||||
for key, group in grouped:
|
||||
mode, stopwords, min_freq = key
|
||||
|
||||
# iterate over all min_pmi values present in this group
|
||||
for m in sorted(group['min_pmi'].unique()):
|
||||
# extract series for this min_pmi and baseline rows where min_pmi == 24 (same group)
|
||||
cip = pd.to_numeric(group[group['min_pmi'] == m].set_index('query')['total_postings_cost'], errors='coerce')
|
||||
|
||||
# Compare to baseline with min_pmi == 24 and min_freq == 1 (same mode & stopwords)
|
||||
baseline = pd.to_numeric(
|
||||
df[
|
||||
(df['mode'] == mode)
|
||||
& (df['stopwords'] == stopwords)
|
||||
& (df['min_pmi'] == 24)
|
||||
& (df['min_freq'] == 1)
|
||||
].set_index('query')['total_postings_cost'],
|
||||
errors='coerce'
|
||||
)
|
||||
|
||||
# align queries
|
||||
paired = pd.DataFrame({
|
||||
'cip': cip,
|
||||
'baseline': baseline
|
||||
}).dropna()
|
||||
|
||||
better = int((paired['cip'] < paired['baseline']).sum())
|
||||
worse = int((paired['cip'] > paired['baseline']).sum())
|
||||
equal = int((paired['cip'] == paired['baseline']).sum())
|
||||
n_pairs = len(paired)
|
||||
n_sign = better + worse
|
||||
|
||||
p_value = None
|
||||
if HAS_SCIPY and n_sign > 0:
|
||||
# pass number of positives (better) as k to binomtest
|
||||
p_value = float(binomtest(better, n_sign, p=0.5, alternative='two-sided').pvalue)
|
||||
|
||||
results.append({
|
||||
'mode': mode,
|
||||
'stopwords': stopwords,
|
||||
'min_freq': min_freq,
|
||||
'compared_min_pmi': m,
|
||||
'n_pairs': n_pairs,
|
||||
'n_better': better,
|
||||
'n_worse': worse,
|
||||
'n_equal': equal,
|
||||
'p_value': p_value,
|
||||
})
|
||||
|
||||
|
||||
out_df = pd.DataFrame(results)
|
||||
out_df = out_df.sort_values(['mode', 'stopwords', 'min_freq', 'compared_min_pmi'])
|
||||
out_df.to_csv(out_csv, index=False)
|
||||
|
||||
# Print a short summary
|
||||
total_comparisons = len(out_df)
|
||||
print(f"Wrote {out_csv} ({total_comparisons} comparisons)")
|
||||
if total_comparisons > 0:
|
||||
print(out_df.head(20).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
CSV = './spreadsheets/results_per_query-113-225.csv'
|
||||
OUT = './spreadsheets/p-values-CiP-q113-225.csv'
|
||||
if not Path(CSV).exists():
|
||||
print(f"Input CSV not found: {CSV}")
|
||||
else:
|
||||
if not HAS_SCIPY:
|
||||
print("scipy not found: binomial p-values will be omitted (set up scipy to get p-values)")
|
||||
main(CSV, OUT)
|
||||
|
||||
|
||||
@ -1 +0,0 @@
|
||||
1 what similarity laws must be obeyed when constructing aeroelastic models of heated high speed aircraft .
|
||||
|
@ -1 +0,0 @@
|
||||
10 are real-gas transport properties for air available over a wide range of enthalpies and densities .
|
||||
|
@ -1 +0,0 @@
|
||||
100 what are the effects of initial imperfections on the elastic buckling of cylindrical shells under axial compression .
|
||||
|
@ -1 +0,0 @@
|
||||
101 why does the incremental theory and the deformation theory of plastic stress-strain relationship differ greatly when applied to stability problems .
|
||||
|
@ -1 +0,0 @@
|
||||
102 basic dynamic characteristics of structures continuous over many spans .
|
||||
|
@ -1 +0,0 @@
|
||||
103 is the information on the buckling of sandwich sphere available .
|
||||
|
@ -1 +0,0 @@
|
||||
104 can the load deformation characteristics of a beam be obtained with the material being inelastic and a non uniform temperature being present .
|
||||
|
@ -1 +0,0 @@
|
||||
105 what is the effect of an internal liquid column on the breathing vibrations of a cylindrical shell .
|
||||
|
@ -1 +0,0 @@
|
||||
106 experimental techniques in shell vibration .
|
||||
|
@ -1 +0,0 @@
|
||||
107 in summarizing theoretical and experimental work on the behaviour of a typical aircraft structure in a noise environment is it possible to develop a design procedure .
|
||||
|
@ -1 +0,0 @@
|
||||
108 what data is there on the fatigue of structures under acoustic loading .
|
||||
|
@ -1 +0,0 @@
|
||||
109 panels subjected to aerodynamic heating .
|
||||
|
@ -1 +0,0 @@
|
||||
11 is it possible to find an analytical, similar solution of the strong blast wave problem in the newtonian approximation .
|
||||
|
@ -1 +0,0 @@
|
||||
110 can increasing the edge loading of a plate beyond the critical value for buckling change the buckling mode .
|
||||
|
@ -1 +0,0 @@
|
||||
111 have the effects of an elastic edge restraint been considered in previous papers on panel flutter .
|
||||
|
@ -1 +0,0 @@
|
||||
112 has the solution of the clamped plate problem, in the classical theory of bending, been reduced to two successive membrane boundary value problems .
|
||||
|
@ -1 +0,0 @@
|
||||
12 how can the aerodynamic performance of channel flow ground effect machines be calculated .
|
||||
|
@ -1 +0,0 @@
|
||||
13 what is the basic mechanism of the transonic aileron buzz .
|
||||
|
@ -1 +0,0 @@
|
||||
14 papers on shock-sound wave interaction .
|
||||
|
@ -1 +0,0 @@
|
||||
15 material properties of photoelastic materials .
|
||||
|
@ -1 +0,0 @@
|
||||
16 can the transverse potential flow about a body of revolution be calculated efficiently by an electronic computer .
|
||||
|
@ -1 +0,0 @@
|
||||
17 can the three-dimensional problem of a transverse potential flow about a body of revolution be reduced to a two-dimensional problem .
|
||||
|
@ -1 +0,0 @@
|
||||
18 are experimental pressure distributions on bodies of revolution at angle of attack available .
|
||||
|
@ -1 +0,0 @@
|
||||
19 does there exist a good basic treatment of the dynamics of re-entry combining consideration of realistic effects with relative simplicity of results .
|
||||
|
@ -1 +0,0 @@
|
||||
2 what are the structural and aeroelastic problems associated with flight of high speed aircraft .
|
||||
|
@ -1 +0,0 @@
|
||||
20 has anyone formally determined the influence of joule heating, produced by the induced current, in magnetohydrodynamic free convection flows under general conditions .
|
||||
|
@ -1 +0,0 @@
|
||||
21 why does the compressibility transformation fail to correlate the high speed data for helium and air .
|
||||
|
@ -1 +0,0 @@
|
||||
22 did anyone else discover that the turbulent skin friction is not over sensitive to the nature of the variation of the viscosity with temperature .
|
||||
|
@ -1 +0,0 @@
|
||||
23 what progress has been made in research on unsteady aerodynamics .
|
||||
|
@ -1 +0,0 @@
|
||||
24 what are the factors which influence the time required to invert large structural matrices .
|
||||
|
@ -1 +0,0 @@
|
||||
25 does a practical flow follow the theoretical concepts for the interaction between adjacent blade rows of a supersonic cascade .
|
||||
|
@ -1 +0,0 @@
|
||||
26 what is a single approximate formula for the displacement thickness of a laminar boundary layer in compressible flow on a flat plate .
|
||||
|
@ -1 +0,0 @@
|
||||
27 how is the design of ring or part ring wings by linear theory affected by thickness .
|
||||
|
@ -1 +0,0 @@
|
||||
28 what application has the linear theory design of curved wings .
|
||||
|
@ -1 +0,0 @@
|
||||
29 what is the effect of cross sectional shape on the flow over simple delta wings with sharp leading edges .
|
||||
|
@ -1 +0,0 @@
|
||||
3 what problems of heat conduction in composite slabs have been solved so far .
|
||||
|
@ -1 +0,0 @@
|
||||
30 papers on flow visualization on slender conical wings .
|
||||
|
@ -1 +0,0 @@
|
||||
31 what size of end plate can be safely used to simulate two-dimensional flow conditions over a bluff cylindrical body of finite aspect ratio .
|
||||
|
@ -1 +0,0 @@
|
||||
32 to find an approximate correction for thickness in slender thin-wing theory .
|
||||
|
@ -1 +0,0 @@
|
||||
33 how do interference-free longitudinal stability measurements (made using free-flight models) compare with similar measurements made in a low-blockage wind tunnel .
|
||||
|
@ -1 +0,0 @@
|
||||
34 have wind tunnel interference effects been investigated on a systematic basis .
|
||||
|
@ -1 +0,0 @@
|
||||
35 are there any papers dealing with acoustic wave propagation in reacting gases .
|
||||
|
@ -1 +0,0 @@
|
||||
36 has anyone investigated relaxation effects on gaseous heat transfer to a suddenly heated wall .
|
||||
|
@ -1 +0,0 @@
|
||||
37 are there any theoretical methods for predicting base pressure .
|
||||
|
@ -1 +0,0 @@
|
||||
38 does transition in the hypersonic wake depend on body geometry and size
|
||||
|
@ -1 +0,0 @@
|
||||
39 how can one detect transition phenomena in boundary layers .
|
||||
|
@ -1 +0,0 @@
|
||||
4 can a criterion be developed to show empirically the validity of flow solutions for chemically reacting gas mixtures based on the simplifying assumption of instantaneous local chemical equilibrium .
|
||||
|
@ -1 +0,0 @@
|
||||
40 how can one detect transition phenomena in hypersonic wakes .
|
||||
|
@ -1 +0,0 @@
|
||||
41 has anyone investigated and developed a simple model for the vortex wake behind a cruciform wing .
|
||||
|
@ -1 +0,0 @@
|
||||
42 what is a criterion that the transonic flow around an airfoil with a round leading edge be validly analyzed by the linearized transonic flow theory .
|
||||
|
@ -1 +0,0 @@
|
||||
43 can the transonic flow around an arbitrary smooth thin airfoil be analysed in a simple approximate way .
|
||||
|
@ -1 +0,0 @@
|
||||
44 what are the details of the rigorous kinetic theory of gases . (chapman-enskog theory) .
|
||||
|
@ -1 +0,0 @@
|
||||
45 has anyone investigated the effect of surface mass transfer on hypersonic viscous interactions .
|
||||
|
@ -1 +0,0 @@
|
||||
46 what is the combined effect of surface heat and mass transfer on hypersonic flow .
|
||||
|
@ -1 +0,0 @@
|
||||
47 what are the existing solutions for hypersonic viscous interactions over an insulated flat plate .
|
||||
|
@ -1 +0,0 @@
|
||||
48 what controls leading-edge attachment at transonic speeds .
|
||||
|
@ -1 +0,0 @@
|
||||
49 can the three-point boundary-value problem for the blasius equation be integrated numerically, using suitable transformations, without iteration on the boundary conditions .
|
||||
|
@ -1 +0,0 @@
|
||||
5 what chemical kinetic system is applicable to hypersonic aerodynamic problems .
|
||||
|
@ -1 +0,0 @@
|
||||
50 what are the effects of small amounts of gas rarefaction on the characteristics of the boundary layers on slender bodies of revolution .
|
||||
|
@ -1 +0,0 @@
|
||||
51 what is the available information pertaining to boundary layers on very slender bodies of revolution in continuum flow (the ?transverse curvature effect) .
|
||||
|
@ -1 +0,0 @@
|
||||
52 what is the available information pertaining to the effect of slight rarefaction on boundary layer flows (the ?slip? effect) .
|
||||
|
@ -1 +0,0 @@
|
||||
53 what investigations have been made of the flow field about a body moving through a rarefied, partially ionized gas in the presence of a magnetic field .
|
||||
|
@ -1 +0,0 @@
|
||||
54 how is the heat transfer downstream of the mass transfer region effected by mass transfer at the nose of a blunted cone .
|
||||
|
@ -1 +0,0 @@
|
||||
55 to what extent can the available information for incompressible boundary layers be applied to problems involving compressible boundary layers .
|
||||
|
@ -1 +0,0 @@
|
||||
56 to what extent can readily available steady-state aerodynamic data be utilized to predict lifting-surface flutter characteristics .
|
||||
|
@ -1 +0,0 @@
|
||||
57 what are the significant steady and non-steady flow characteristics which affect the flutter mechanism .
|
||||
|
@ -1 +0,0 @@
|
||||
58 is it possible to determine rates of forced convective heat transfer from heated cylinders of non-circular cross-section, (the fluid flow being along the generators) .
|
||||
|
@ -1 +0,0 @@
|
||||
59 how much is known about boundary layer flows along non-circular cylinders .
|
||||
|
@ -1 +0,0 @@
|
||||
6 what theoretical and experimental guides do we have as to turbulent couette flow behaviour .
|
||||
|
@ -1 +0,0 @@
|
||||
60 is there any simple, but practical, method for numerical integration of the mixing problem (i.e. the blasius problem with three-point boundary conditions) .
|
||||
|
@ -1 +0,0 @@
|
||||
61 does there exist a closed-form expression for the local heat transfer around a yawed cylinder .
|
||||
|
@ -1 +0,0 @@
|
||||
62 how far around a cylinder and under what conditions of flow, if any, is the velocity just outside of the boundary layer a linear function of the distance around the cylinder .
|
||||
|
@ -1 +0,0 @@
|
||||
63 where can i find pressure data on surfaces of swept cylinders .
|
||||
|
@ -1 +0,0 @@
|
||||
64 can't the static deflection shapes be used in predicting flutter in place of vibrational shapes . if so, can we provide a justification by means of an example .
|
||||
|
@ -1 +0,0 @@
|
||||
65 does the boundary layer on a flat plate in a shear flow induce a pressure gradient .
|
||||
|
@ -1 +0,0 @@
|
||||
66 can the procedure of matching inner and outer solutions for a viscous flow problem be applied when the main stream is a shear flow .
|
||||
|
@ -1 +0,0 @@
|
||||
67 can series expansions be found for the boundary layer on a flat plate in a shear flow .
|
||||
|
@ -1 +0,0 @@
|
||||
68 what possible techniques are available for computing the injection distribution corresponding to an isothermal transpiration cooled hemisphere .
|
||||
|
@ -1 +0,0 @@
|
||||
69 what is known regarding asymptotic solutions to the exact boundary layer equations .
|
||||
|
@ -1 +0,0 @@
|
||||
7 is it possible to relate the available pressure distributions for an ogive forebody at zero angle of attack to the lower surface pressures of an equivalent ogive forebody at angle of attack .
|
||||
|
@ -1 +0,0 @@
|
||||
70 previous solutions to the boundary layer similarity equations .
|
||||
|
@ -1 +0,0 @@
|
||||
71 experimental results on hypersonic viscous interaction .
|
||||
|
@ -1 +0,0 @@
|
||||
72 what has been done about viscous interactions in relatively low reynolds number flows, particularly at high mach numbers .
|
||||
|
@ -1 +0,0 @@
|
||||
73 what role does the effect of chemical reaction (particularly when out of equilibrium) play in the similitude laws governing hypersonic flows over slender aerodynamic bodies .
|
||||
|
@ -1 +0,0 @@
|
||||
74 how significant is the possible pressure of a dissociated free stream with respect to the realization of hypersonic simulation in high enthalpy wind tunnels .
|
||||
|
@ -1 +0,0 @@
|
||||
75 do the discrepancies among current analyses of the vorticity effect on stagnation-point heat transfer result primarily from the differences in the viscosity-temperature law assumed .
|
||||
|
@ -1 +0,0 @@
|
||||
76 how far can one trust the linear viscosity-temperature solution assumed in some of the analyses of hypersonic shock layer at low reynolds number .
|
||||
|
@ -1 +0,0 @@
|
||||
77 how close is the comparison of the shock layer theory with existing experiments in the low reynolds number (merged-layer) regime .
|
||||
|
@ -1 +0,0 @@
|
||||
78 has anyone explained the kink in the surge line of a multi-stage axial compressor .
|
||||
|
@ -1 +0,0 @@
|
||||
79 have any aerodynamic derivatives been measured at hypersonic mach numbers and comparison been made with theoretical work .
|
||||
|
@ -1 +0,0 @@
|
||||
8 what methods -dash exact or approximate -dash are presently available for predicting body pressures at angle of attack.
|
||||
|
@ -1 +0,0 @@
|
||||
80 are methods of measuring aerodynamic derivatives available which could be adopted for use in short running time facilities .
|
||||
|
@ -1 +0,0 @@
|
||||
81 what are wind-tunnel corrections for a two-dimensional aerofoil mounted off-centre in a tunnel .
|
||||
|
@ -1 +0,0 @@
|
||||
82 how do kuchemann's and multhopp's methods for calculating lift distributions on swept wings in subsonic flow compare with each other and with experiment .
|
||||
|
@ -1 +0,0 @@
|
||||
83 what is the present state of the theory of quasi-conical flows .
|
||||
|
@ -1 +0,0 @@
|
||||
84 references on the methods available for accurately estimating aerodynamic heat transfer to conical bodies for both laminar and turbulent flow .
|
||||
|
@ -1 +0,0 @@
|
||||
85 what parameters can seriously influence natural transition from laminar to turbulent flow on a model in a wind tunnel .
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user