schema.graphql



type CogGroupRequirements {
  name: String!
  description: String
  fields: [FieldInfo!]!
  cog_types: [CogMetric!]!
}

type CogMetric {
  name: String!
  description: String!
  func: function!
}


type FieldInfo {
  dimension: FieldDimension
  typeof: FieldTypeOf
}
enum FieldDimension {
  X, Y, Z
  OTHER,
  ANY
}
enum FieldTypeOf {
  CONTINUOUS
  DISCRETE, DISCRETE_ORDERED, DISCRETE_UNORDERED
  DATE
  ANY
}


univariate_continuous
  name = "continuous univariate metrics"
  description = "univariate metrics for continuous data"
  fields = [('X', 'CONTINUOUS')]
  cog_types = [
    ('min', 'minimum of non NA data')
    ('max', 'maximum of non NA data')
    ('mean', 'mean of non NA data')
    ('median', 'median of non NA data')
    ('mode', 'mode of non NA data')
    ('var', 'variance of non NA data')
  ]

univariate_counts
  name = "univariate count metrics"
  description = "count metrics for univariate data"
  fields = [('X', 'ANY'), ]
  cog_types = [
    ('n', 'count of non NA data')
    ('n_na', 'count of NA data')
  ]

univariate_discrete
  name = "discrete univariate metrics"
  description = "univariate metrics for discrete data"
  fields = [('X', 'DISCRETE')]
  cog_types = [
    ('min_count_field', 'field with the minimum count')
    ('max_count_field', 'field with the maximum count')
  ]

univariate_boxpot
  name = "boxplot metrics"
  description = "univariate boxplot metrics for continuous data"
  fields = [('X', 'CONTINUOUS')]
  cog_types = [
    ('n_outlier_lower', 'count of lower outliers')
    ('min_non_outlier', 'minimum point')
    ('q1', 'first quartile value')
    ('median', 'median value')
    ('q3', 'third quartile value')
    ('max_non_outlier', 'maximum non outlier point')
    ('n_outlier_upper', 'count of upper outliers')
  ]


bivariate_continuous
  name = "continuous bivariate metrics"
  description = "bivariate metrics for continuous data"
  fields = [('x', 'CONTINUOUS'), ('y', 'CONTINUOUS')]
  cog_types = [
    ('covariance', 'count of non NA data')
    ('correlation', 'count of non NA data')
  ]

bivariate_scagnostics
  name = "scagnostics"
  description = "bivariate metrics for continuous data"
  fields = [('x', 'CONTINUOUS'), ('y', 'CONTINUOUS')]
  cog_types = [
    ('Outlying, 'proportion of the total edge length due to extremely long edges connected to points of single degree')
    ('Skewed', 'distribution of edge lengths of a minimum spanning tree gives us information about the relative density of points in a scattered configuration')
    ('Clumpy', 'An extremely skewed distribution of MST edge lengths does not necessarily indicate clustering of points. For this, we turn to another measure based on the MST: the Hartigan and Mo- hanty RUNT statistic [20]. This statistic is most easily un- derstood in terms of the single-linkage hierarchical clustering tree called a dendrogram.')
    ('Sparse', 'the 90% quantile of the edge lengths of the MST')
    ('Striated', 'sumation of angles over all adjacent edges of a MST')
    ('Convex', 'ratio of the area of the alpha hull and the area of the convex hull')
    ('Skinny', 'ratio of perimeter to area of a polygon measures, roughly, how skinny it is')
    ('Stringy', 'The stringy measure is based on the π index, which is the ratio of width to length of a network')
    ('Monotonic', 'squared Spearman corre- lation coefficient, which is a Pearson correlation on the ranks of x and y (corrected for ties)')
  ]

bivariate_counts
  name = "continuous bivariate metrics"
  description = "bivariate metrics for continuous data"
  fields = [('X', 'ANY'), ('Y', 'ANY')]
  cog_types = [
    ('n', 'count of non NA data')
    ('n_both_na', 'count of data where both X and Y are NA')
    ('n_or_na', 'count of data where either X or Y are NA')
    ('n_x_na', 'count of X NA data')
    ('n_y_na', 'count of Y NA data')
  ]

linear_model
  name = "linear model"
  description = "linear model metrics"
  fields = [('X', 'CONTINUOUS'), ('Y', 'CONTINUOUS'), ?('GROUP', 'ANY')]
  cog_types = [
    ('intercept', 'intercept of model')
    (coef_i, 'beta value of coefficient i')
    (coef_i_p_value, 'significance of coefficient i')
    ('r2', 'fraction of variance explained by the model')
    ('sigma', 'square root of the estimated residual variance')
    ('statistic', 'F-statistic of the model')
    ('p.value', 'p-value form the F test')
    ('df', 'degrees of freedom used by the coefficients')
    ('logLik', 'log-likelihood value under the model')
    ('AIC', 'Akaike's An Information Criterion')
    ('BIC', 'Schwarz's Bayesian criterion')
    ('deviance', 'quality-of-fit statistic of the model')
    ('df.residual', 'residual degrees of freedom')
    ('n_sig_cooks', 'number of significant cooks distance points. (sum(cooks_distance >= 4/n))')
    ('n_sig_hat', 'number of significant influence points. (sum(hat > 2 * p / n))')
    ('resid_shapiro', 'test for normality of residuals')
  ]

box_cox
  name = "box cox"
  description = "box cox transformation values"
  fields = [('Y', 'CONTINUOUS')]
  cog_types = [
    ('lower', 'lower of 95% interval')
    ('upper', 'upper of 95% interval')
  ]

grouped_counts
  name = "grouped counts"
  description = "count information for grouped data"
  fields = [('OTHER', 'CONTINUOUS')]
  cog_types = [
    ('min', 'minimum of non NA data')
    ('max', 'maximum of non NA data')
    ('mean', 'mean of non NA data')
    ('median', 'median of non NA data')
    ('mode', 'mode of non NA data')
    ('var', 'variance of non NA data')
  ]


grouped_testing
  name = "grouped testing"
  description = "test differences between univariate groups"
  fields = [('ANY', 'CONTINUOUS')]
  cog_types = [
    ('anova_f_value', 'F value of anova test')
    ('anova_p_value', 'p value of anova F value')
    ('sig_diff', 'number of significantly different pairs')
  ]


type PlotAutoCogs {
  name: String!
  description: String
  example: Picture
  cognostics: [String!]! # list of all cognostics names. ex: univariate_counts, univariate_continuous
}


Geoms

  geom_bar geom_col stat_count
    - Bars charts
    * univariate_counts(x)
    ** grouped_counts(counts...)
    ** grouped_testing(x...)

  geom_hex stat_bin_hex
    - Hexagonal heatmap of 2d bin counts
  geom_bin2d stat_bin_2d
    - Heatmap of 2d bin counts
    * bivariate_counts(x, y)
    * chi_square_test(bin_counts)

  geom_boxplot stat_boxplot
    - A box and whiskers plot (in the style of Tukey)
    * univariate_continuous(x)
    * univariate_boxpot(x)
    ^ univariate_counts(x)
    ** grouped_counts(counts...)
    ** grouped_testing(x...)
    ** ('n_outlier_lower', 'count of lower outliers')
    ** ('min_non_outlier', 'minimum point')
    ** ('max_non_outlier', 'maximum non outlier point')
    ** ('n_outlier_upper', 'count of upper outliers')

  geom_count stat_sum
    - Count overlapping points
    * expected count(x)
    * bivariate_counts(x, y)
    ** grouped_counts(counts...)

  geom_density_2d stat_density_2d
    - Contours of a 2d density estimate
    * max density(x, y)
    * bivariate_continuous(x,y)
    ** grouped_counts(counts...)
    ** grouped_testing(y...)

  geom_density stat_density
    - Smoothed density estimates
  geom_violin stat_ydensity
    - Violin plot
    * ('max_density', 'maximum density value')
    * univariate_continuous(x)
    ** grouped_testing(x...)
    ** grouped_counts(counts...)

  geom_dotplot
    - Dot plot
    * univariate_continuous(x)
    * univariate_counts(x)
    ** grouped_counts(counts...)
    ** grouped_testing(x...)

  geom_freqpoly geom_histogram stat_bin
    - Histograms and frequency polygons
    * univariate_continuous(x)
    * univariate_counts(x)
    * univariate_continuous(counts)
    ** grouped_counts(counts...)
    ** grouped_testing(x...)

  geom_jitter
    - Jittered points
    * univariate_continuous(x)
    * univariate_continuous(y)
    * bivariate_continuous(x,y)
    * bivariate_counts(x,y)
    ** grouped_counts(counts...)
    ** grouped_testing(y...)

  geom_point
    - Points
    * univariate_continuous(x)
    * univariate_continuous(y)
    * bivariate_continuous(x,y)
    * bivariate_counts(x,y)
    * bivariate_scagnostics(x,y)
    ** grouped_counts(counts...)
    ** grouped_testing(y...)

  geom_qq stat_qq
    - A quantile-quantile plot
    * sum of delta^2 | above
    * sum of delta^2 | below
    * sum of delta^2
    * KS test (x, dist)

  geom_quantile stat_quantile
    - Quantile regression
    ^ contains N points
    ^ contains N points above
    ^ contains N points below
    ^ integral of area
    ** contains N points in group
    ** contains N points above in group
    ** contains N points below in group
    ** integral of area in group
    ** grouped_counts(counts...)
    ** grouped_testing(y...)

  geom_rug
    - Rug plots in the margins
    * univariate_continuous(x)
    * univariate_counts(x)
    ** grouped_counts(counts...)
    ** grouped_testing(x...)

  geom_smooth stat_smooth
    - Smoothed conditional means
    * bivariate_counts(x,y)
    ^ linear_model(x, y)
    ** linear_model(x, y, group)
    ** group variable significance (intercept)
    ** group and x interaction significance (slope)
    ** grouped_counts(counts...)

  geom_spoke
    - Line segments parameterised by location, direction and distance
    * univariate_continuous(angle) | angle is aes
    * univariate_continuous(spoke) | spoke is aes
    ** grouped_counts(counts...)
    ** grouped_testing(angle...)
    ** grouped_testing(spoke...)


Maybe Nothing?
  geom_contour stat_contour
    - 2d contours of a 3d surface
  geom_path geom_line geom_step
    - Connect observations
  geom_raster geom_rect geom_tile
    - Rectangles

Nothing
  geom_abline geom_hline geom_vline
    - Reference lines: horizontal, vertical, and diagonal
  geom_blank
    - Draw nothing
  geom_errorbarh
    - Horizontal error bars
  geom_crossbar geom_errorbar geom_linerange geom_pointrange
    - Vertical intervals: lines, crossbars & errorbars
  geom_map
    - Polygons from a reference map
  geom_polygon
    - Polygons
  geom_ribbon geom_area
    - Ribbons and area plots
  geom_segment geom_curve
    - Line segments and curves
  geom_label geom_text
    - Text
  stat_sf geom_sf coord_sf
    - Visualise sf objects


type CogGroupAnswer {
  requirement_name: String!
  name: String!
  answers: [CogValue!]!
}

type CogValue {
  key: String!
  description: String
  value: Value
}