Compare commits

...

4 Commits

22 changed files with 1605 additions and 87 deletions

View File

@@ -54,7 +54,7 @@ frontend-install:
cd frontend && npm install
frontend-dev:
cd frontend && npm run dev
cd frontend && npm run dev -- --host 0.0.0.0
frontend-build:
cd frontend && npm run build
@@ -82,7 +82,7 @@ test: backend-test frontend-test
serve:
@echo "Starting backend (port 3000) and frontend (port 5173)..."
@cd backend && cargo run -- serve & cd frontend && npm run dev
@cd backend && cargo run -- serve & cd frontend && npm run dev -- --host 0.0.0.0
clean:
cd backend && cargo clean

View File

@@ -47,3 +47,9 @@ regex = "1"
# CLI
argh = "0.1"
reqwest = { version = "0.12.26", features = ["multipart", "json"] }
serde_json = "1.0.145"
# PDF parsing for page count
lopdf = "0.36"
strsim = "0.11"

245
backend/ocr_schema.json Normal file
View File

@@ -0,0 +1,245 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"name": "LabReport",
"description": "Extract biomarker data from a medical lab report",
"type": "object",
"properties": {
"patient_name": {
"type": "string",
"description": "Full name of the patient"
},
"patient_age": {
"type": "integer",
"description": "Age of the patient in years"
},
"patient_gender": {
"type": "string",
"enum": [
"male",
"female",
"other"
],
"description": "Gender of the patient"
},
"lab_name": {
"type": "string",
"description": "Name of the laboratory"
},
"test_date": {
"type": "string",
"description": "Date when the sample was collected (YYYY-MM-DD format if possible)"
},
"report_id": {
"type": "string",
"description": "Report ID, barcode, or reference number"
},
"biomarkers": {
"type": "array",
"description": "List of biomarker test results",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Name of the biomarker/test",
"enum": [
"ARSENIC",
"CADMIUM",
"MERCURY",
"LEAD",
"CHROMIUM",
"BARIUM",
"COBALT",
"CAESIUM",
"THALLIUM",
"URANIUM",
"STRONTIUM",
"ANTIMONY",
"TIN",
"MOLYBDENUM",
"SILVER",
"VANADIUM",
"BERYLLIUM",
"BISMUTH",
"SELENIUM",
"ALUMINIUM",
"NICKEL",
"MANGANESE",
"GLYCOSYLATED HEMOGLOBIN (HbA1c)",
"AVERAGE BLOOD GLUCOSE (ABG)",
"FASTING BLOOD SUGAR (GLUCOSE)",
"INSULIN FASTING",
"FRUCTOSAMINE",
"BLOOD KETONE (D3HB)",
"ERYTHROCYTE SEDIMENTATION RATE (ESR)",
"ANTI CCP (ACCP)",
"ANTI NUCLEAR ANTIBODIES (ANA)",
"HEMOGLOBIN",
"HEMATOCRIT (PCV)",
"TOTAL RED BLOOD CELL COUNT (RBC)",
"MEAN CORPUSCULAR VOLUME (MCV)",
"MEAN CORPUSCULAR HEMOGLOBIN (MCH)",
"MEAN CORP. HEMO. CONC (MCHC)",
"RED CELL DISTRIBUTION WIDTH - SD (RDW-SD)",
"RED CELL DISTRIBUTION WIDTH (RDW-CV)",
"TOTAL LEUCOCYTE COUNT (WBC)",
"NEUTROPHILS PERCENTAGE",
"LYMPHOCYTES PERCENTAGE",
"MONOCYTES PERCENTAGE",
"EOSINOPHILS PERCENTAGE",
"BASOPHILS PERCENTAGE",
"IMMATURE GRANULOCYTE PERCENTAGE (IG%)",
"NUCLEATED RED BLOOD CELLS %",
"NEUTROPHILS ABSOLUTE COUNT",
"LYMPHOCYTES ABSOLUTE COUNT",
"MONOCYTES - ABSOLUTE COUNT",
"BASOPHILS ABSOLUTE COUNT",
"EOSINOPHILS ABSOLUTE COUNT",
"IMMATURE GRANULOCYTES (IG)",
"NUCLEATED RED BLOOD CELLS",
"PLATELET COUNT",
"MEAN PLATELET VOLUME (MPV)",
"PLATELET DISTRIBUTION WIDTH (PDW)",
"PLATELET TO LARGE CELL RATIO (PLCR)",
"PLATELETCRIT (PCT)",
"VITAMIN A",
"VITAMIN E",
"VITAMIN K",
"VITAMIN B1 (THIAMIN)",
"VITAMIN B2 (RIBOFLAVIN)",
"VITAMIN B3 (NIACIN/NICOTINIC ACID)",
"VITAMIN B5 (PANTOTHENIC ACID)",
"VITAMIN B6 (PYRIDOXAL-5-PHOSPHATE)",
"VITAMIN B7 (BIOTIN)",
"VITAMIN B9 (FOLIC ACID)",
"VITAMIN B12 (COBALAMIN)",
"VITAMIN D TOTAL",
"VITAMIN D2",
"VITAMIN D3",
"CORTISOL",
"CORTICOSTERONE",
"ANDROSTENEDIONE",
"ESTRADIOL",
"TESTOSTERONE",
"PROGESTERONE",
"17-HYDROXYPROGESTERONE",
"DEHYDROEPIANDROSTERONE (DHEA)",
"DHEA - SULPHATE (DHEAS)",
"DEOXYCORTISOL",
"ALPHA-1-ANTITRYPSIN (AAT)",
"HOMOCYSTEINE",
"TROPONIN I",
"HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)",
"LIPOPROTEIN (A) [Lp(a)]",
"LIPOPROTEIN-ASSOCIATED PHOSPHOLIPASE A2 (LP-PLA2)",
"CYSTATIN C",
"BLOOD UREA NITROGEN (BUN)",
"UREA (CALCULATED)",
"CREATININE - SERUM",
"UREA / SR.CREATININE RATIO",
"BUN / SR.CREATININE RATIO",
"CALCIUM",
"URIC ACID",
"ESTIMATED GLOMERULAR FILTRATION RATE (eGFR)",
"TOTAL CHOLESTEROL",
"HDL CHOLESTEROL - DIRECT",
"LDL CHOLESTEROL - DIRECT",
"TRIGLYCERIDES",
"VLDL CHOLESTEROL",
"NON-HDL CHOLESTEROL",
"TC / HDL CHOLESTEROL RATIO",
"LDL / HDL RATIO",
"HDL / LDL RATIO",
"TRIG / HDL RATIO",
"APOLIPOPROTEIN - A1 (APO-A1)",
"APOLIPOPROTEIN - B (APO-B)",
"APO B / APO A1 RATIO",
"IRON",
"TOTAL IRON BINDING CAPACITY (TIBC)",
"% TRANSFERRIN SATURATION",
"FERRITIN",
"UNSAT. IRON-BINDING CAPACITY (UIBC)",
"ALKALINE PHOSPHATASE",
"BILIRUBIN - TOTAL",
"BILIRUBIN - DIRECT",
"BILIRUBIN (INDIRECT)",
"GAMMA GLUTAMYL TRANSFERASE (GGT)",
"ASPARTATE AMINOTRANSFERASE (SGOT)",
"ALANINE TRANSAMINASE (SGPT)",
"SGOT / SGPT RATIO",
"PROTEIN - TOTAL",
"ALBUMIN - SERUM",
"SERUM GLOBULIN",
"SERUM ALB/GLOBULIN RATIO",
"SODIUM",
"POTASSIUM",
"CHLORIDE",
"MAGNESIUM",
"TOTAL TRIIODOTHYRONINE (T3)",
"TOTAL THYROXINE (T4)",
"TSH ULTRASENSITIVE",
"SERUM COPPER",
"SERUM ZINC",
"AMYLASE",
"LIPASE",
"URINARY MICROALBUMIN",
"CREATININE - URINE",
"URI. ALBUMIN/CREATININE RATIO",
"URINE COLOUR",
"URINE APPEARANCE",
"URINE SPECIFIC GRAVITY",
"URINE PH",
"URINARY PROTEIN",
"URINARY GLUCOSE",
"URINE KETONE",
"URINARY BILIRUBIN",
"UROBILINOGEN",
"BILE SALT",
"BILE PIGMENT",
"URINE BLOOD",
"NITRITE",
"LEUCOCYTE ESTERASE",
"MUCUS",
"URINE RBC",
"URINARY LEUCOCYTES (PUS CELLS)",
"EPITHELIAL CELLS",
"CASTS",
"CRYSTALS",
"BACTERIA",
"YEAST",
"PARASITE",
"WEIGHT",
"HEIGHT",
"BODY MASS INDEX (BMI)",
"HEART RATE",
"BLOOD PRESSURE SYSTOLIC",
"BLOOD PRESSURE DIASTOLIC",
"OXYGEN SATURATION (SpO2)",
"BODY TEMPERATURE",
"STEPS",
"CALORIES BURNED"
]
},
"value": {
"type": "number",
"description": "Observed/measured value"
},
"value_string": {
"type": "string",
"description": "Value as string if non-numeric (e.g., 'Negative', 'Trace', '> 65')"
},
"unit": {
"type": "string",
"description": "Unit of measurement"
}
},
"required": [
"name"
]
}
}
},
"required": [
"biomarkers"
]
}

View File

@@ -10,6 +10,7 @@ paths:
database: "./data/zhealth.db"
logs: "./logs"
uploads: "./data/uploads"
max_upload_mb: 50 # Maximum file upload size in MB
logging:
level: "info" # Options: trace | debug | info | warn | error
@@ -29,3 +30,11 @@ ai:
provider: "gemini" # Options: gemini | openai | anthropic
model: "gemini-3-flash-preview"
api_key: "${AI_API_KEY}"
# Mistral OCR for document parsing
# Note: API key is set per-user in Profile settings (BYOK)
mistral:
ocr_model: "mistral-ocr-latest"
max_pages_per_request: 8
max_retries: 2 # Max retry attempts per chunk
timeout_secs: 120 # Request timeout in seconds

View File

@@ -214,7 +214,7 @@ biomarkers:
# ============================================================================
# DIABETES / METABOLIC - Scale-based interpretations
# ============================================================================
- name: "HbA1c"
- name: "GLYCOSYLATED HEMOGLOBIN (HbA1c)"
test_category: DIABETES
category: metabolic
unit: "%"
@@ -339,7 +339,7 @@ biomarkers:
min: 36.0
max: 44.0
- name: "TOTAL RBC"
- name: "TOTAL RED BLOOD CELL COUNT (RBC)"
test_category: HEMOGRAM
category: blood
unit: "10^6/µL"
@@ -614,7 +614,7 @@ biomarkers:
min: 0.13
max: 1.19
- name: "VITAMIN B1/THIAMIN"
- name: "VITAMIN B1 (THIAMIN)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -623,7 +623,7 @@ biomarkers:
min: 0.5
max: 4.0
- name: "VITAMIN B2/RIBOFLAVIN"
- name: "VITAMIN B2 (RIBOFLAVIN)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -632,7 +632,7 @@ biomarkers:
min: 1.6
max: 68.2
- name: "VITAMIN B3/NICOTINIC ACID"
- name: "VITAMIN B3 (NIACIN/NICOTINIC ACID)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -640,7 +640,7 @@ biomarkers:
reference:
max: 5.0
- name: "VITAMIN B5/PANTOTHENIC"
- name: "VITAMIN B5 (PANTOTHENIC ACID)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -649,7 +649,7 @@ biomarkers:
min: 11.0
max: 150.0
- name: "VITAMIN B6/P5P"
- name: "VITAMIN B6 (PYRIDOXAL-5-PHOSPHATE)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -658,7 +658,7 @@ biomarkers:
min: 5.0
max: 50.0
- name: "VITAMIN B7/BIOTIN"
- name: "VITAMIN B7 (BIOTIN)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -667,7 +667,7 @@ biomarkers:
min: 0.2
max: 3.0
- name: "VITAMIN B9/FOLIC ACID"
- name: "VITAMIN B9 (FOLIC ACID)"
test_category: VITAMIN
category: vitamins
unit: "ng/mL"
@@ -676,7 +676,7 @@ biomarkers:
min: 0.2
max: 20.0
- name: "VITAMIN B-12"
- name: "VITAMIN B12 (COBALAMIN)"
test_category: VITAMIN
category: vitamins
unit: "pg/mL"
@@ -951,7 +951,7 @@ biomarkers:
- { min: 4, max: 10, label: "Moderate risk of future heart attack" }
- { min: 10, label: "Elevated risk of future heart attack" }
- name: "HS-CRP"
- name: "HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)"
test_category: CARDIAC
category: cardiac
unit: "mg/L"
@@ -970,7 +970,7 @@ biomarkers:
reference:
max: 30.0
- name: "LP-PLA2"
- name: "LIPOPROTEIN-ASSOCIATED PHOSPHOLIPASE A2 (LP-PLA2)"
test_category: CARDIAC
category: cardiac
unit: "nmol/min/mL"
@@ -1062,7 +1062,7 @@ biomarkers:
min: 2.6
max: 6.0
- name: "eGFR"
- name: "ESTIMATED GLOMERULAR FILTRATION RATE (eGFR)"
test_category: RENAL
category: renal
unit: "mL/min/1.73m²"
@@ -1733,7 +1733,7 @@ biomarkers:
category: body
unit: "cm"
- name: "BMI"
- name: "BODY MASS INDEX (BMI)"
test_category: BODY
category: body
unit: "kg/m²"
@@ -1773,7 +1773,7 @@ biomarkers:
- { min: 80, max: 89, label: "High Blood Pressure Stage 1" }
- { min: 90, label: "High Blood Pressure Stage 2" }
- name: "SPO2"
- name: "OXYGEN SATURATION (SpO2)"
test_category: VITALS
category: vitals
unit: "%"

View File

@@ -12,6 +12,7 @@ pub struct Config {
pub auth: AuthConfig,
pub admin: AdminConfig,
pub ai: AiConfig,
pub mistral: MistralConfig,
}
#[derive(Debug, Deserialize)]
@@ -20,11 +21,12 @@ pub struct ServerConfig {
pub port: u16,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, Clone)]
pub struct PathsConfig {
pub database: String,
pub logs: String,
pub uploads: String,
pub max_upload_mb: u32,
}
#[derive(Debug, Deserialize)]
@@ -53,6 +55,17 @@ pub struct AiConfig {
pub api_key: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct MistralConfig {
/// API key - NOT loaded from config, set at runtime from user's profile
#[serde(skip, default)]
pub api_key: String,
pub ocr_model: String,
pub max_pages_per_request: u32,
pub max_retries: u32,
pub timeout_secs: u64,
}
impl Config {
/// Load configuration from a YAML file.
pub fn load<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {

View File

@@ -6,8 +6,10 @@ use axum::{
use chrono::Utc;
use sea_orm::{ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, QueryOrder, Set};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::models::bio::{biomarker, biomarker_entry};
use crate::models::bio::{biomarker, biomarker_entry, biomarker_reference_rule};
use crate::models::user::user;
/// Request to create a new biomarker entry.
#[derive(Deserialize)]
@@ -30,6 +32,23 @@ pub struct EntryResponse {
pub notes: Option<String>,
}
/// Response for biomarker result with reference info.
#[derive(Serialize)]
pub struct BiomarkerResult {
pub biomarker_id: i32,
pub name: String,
pub category_id: i32,
pub unit: String,
// Latest entry
pub value: Option<f64>,
pub measured_at: Option<String>,
// Reference info
pub ref_min: Option<f64>,
pub ref_max: Option<f64>,
pub label: String,
pub severity: i32,
}
/// POST /api/entries - Create a new biomarker entry.
pub async fn create_entry(
State(db): State<DatabaseConnection>,
@@ -103,7 +122,7 @@ pub async fn list_user_entries(
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let bm_map: std::collections::HashMap<i32, String> = biomarkers
let bm_map: HashMap<i32, String> = biomarkers
.into_iter()
.map(|b| (b.id, b.name))
.collect();
@@ -122,3 +141,143 @@ pub async fn list_user_entries(
Ok(Json(items))
}
/// GET /api/users/:user_id/results - Get latest biomarker results with reference rules.
pub async fn get_user_results(
State(db): State<DatabaseConnection>,
Path(user_id): Path<i32>,
) -> Result<Json<Vec<BiomarkerResult>>, StatusCode> {
// Get user profile for sex/age matching
let user_profile = user::Entity::find_by_id(user_id)
.one(&db)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// Calculate age from birthdate
let user_age = user_profile.birthdate.map(|bd| {
let today = chrono::Utc::now().date_naive();
let years = today.years_since(bd).unwrap_or(0) as i32;
years
});
// Fetch all biomarkers
let biomarkers = biomarker::Entity::find()
.all(&db)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Fetch all entries for this user, ordered by measured_at desc
let entries = biomarker_entry::Entity::find()
.filter(biomarker_entry::Column::UserId.eq(user_id))
.order_by_desc(biomarker_entry::Column::MeasuredAt)
.all(&db)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Build map of biomarker_id -> latest entry
let mut latest_entries: HashMap<i32, &biomarker_entry::Model> = HashMap::new();
for entry in &entries {
latest_entries.entry(entry.biomarker_id).or_insert(entry);
}
// Fetch all reference rules
let rules = biomarker_reference_rule::Entity::find()
.order_by_asc(biomarker_reference_rule::Column::SortOrder)
.all(&db)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Group rules by biomarker_id
let mut rules_map: HashMap<i32, Vec<&biomarker_reference_rule::Model>> = HashMap::new();
for rule in &rules {
rules_map.entry(rule.biomarker_id).or_default().push(rule);
}
// Build results
let mut results: Vec<BiomarkerResult> = Vec::new();
for bm in &biomarkers {
let entry = latest_entries.get(&bm.id);
let value = entry.map(|e| e.value);
let measured_at = entry.map(|e| e.measured_at.to_string());
// Find matching reference rule
let bm_rules = rules_map.get(&bm.id).map(|v| v.as_slice()).unwrap_or(&[]);
let (ref_min, ref_max, label, severity) = find_matching_rule(bm_rules, value, user_age, None);
results.push(BiomarkerResult {
biomarker_id: bm.id,
name: bm.name.clone(),
category_id: bm.category_id,
unit: bm.unit.clone(),
value,
measured_at,
ref_min,
ref_max,
label,
severity,
});
}
Ok(Json(results))
}
/// Find the best matching reference rule for a value.
fn find_matching_rule(
rules: &[&biomarker_reference_rule::Model],
value: Option<f64>,
user_age: Option<i32>,
_user_sex: Option<&str>,
) -> (Option<f64>, Option<f64>, String, i32) {
// Default: no data
if rules.is_empty() {
return (None, None, "No reference".to_string(), 0);
}
// Find the "range" type rule first (defines normal range)
let range_rule = rules.iter().find(|r| r.rule_type == "range");
let (ref_min, ref_max) = range_rule
.map(|r| (r.value_min, r.value_max))
.unwrap_or((None, None));
// If no value, return range with "No data" label
let Some(val) = value else {
return (ref_min, ref_max, "No data".to_string(), 0);
};
// Find matching scale rule based on value
for rule in rules {
// Check age bounds
if let Some(min_age) = rule.age_min {
if user_age.map(|a| a < min_age).unwrap_or(false) {
continue;
}
}
if let Some(max_age) = rule.age_max {
if user_age.map(|a| a > max_age).unwrap_or(false) {
continue;
}
}
// Check value bounds
let min_ok = rule.value_min.map(|min| val >= min).unwrap_or(true);
let max_ok = rule.value_max.map(|max| val <= max).unwrap_or(true);
if min_ok && max_ok {
return (ref_min, ref_max, rule.label.clone(), rule.severity);
}
}
// No matching rule found, determine based on range
if let (Some(min), Some(max)) = (ref_min, ref_max) {
if val < min {
return (ref_min, ref_max, "Low".to_string(), 1);
} else if val > max {
return (ref_min, ref_max, "High".to_string(), 1);
}
}
(ref_min, ref_max, "Normal".to_string(), 0)
}

View File

@@ -5,5 +5,6 @@ pub mod biomarkers;
pub mod categories;
pub mod diets;
pub mod entries;
pub mod ocr;
pub mod sources;
pub mod users;

View File

@@ -0,0 +1,183 @@
//! Biomarker matching and merging logic.
use std::collections::HashMap;
use strsim::jaro_winkler;
use super::types::{Biomarker, DocumentAnnotation, OcrResult};
/// Fuzzy matching threshold (0.0 - 1.0).
/// Names with Jaro-Winkler similarity >= this value are considered a match.
const FUZZY_THRESHOLD: f64 = 0.90;
/// Find a matching biomarker name from the valid set.
/// Returns the canonical name (original case) if found (exact, alias, or fuzzy match).
///
/// Matching order:
/// 1. Exact match on full name (case-insensitive)
/// 2. Extract parenthetical alias from INPUT (e.g., `(HS-CRP)` from `HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)`)
/// 3. Extract parenthetical alias from SCHEMA (e.g., `HS-CRP` matches `HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)`)
/// 4. Fuzzy match with Jaro-Winkler (threshold 0.90)
///
/// valid_biomarkers: HashMap<uppercase_name, original_case_name>
fn find_matching_biomarker(name: &str, valid_biomarkers: &HashMap<String, String>) -> Option<String> {
let name_upper = name.to_uppercase();
// 1. Exact match first (fast path) - lookup by uppercase key, return original case value
if let Some(canonical) = valid_biomarkers.get(&name_upper) {
return Some(canonical.clone());
}
// 2. Try extracting parenthetical alias from INPUT
if let Some(alias) = extract_parenthetical_alias(&name_upper) {
if let Some(canonical) = valid_biomarkers.get(&alias) {
tracing::debug!(
"Alias matched '{}' -> '{}' (extracted from parentheses in input)",
name, canonical
);
return Some(canonical.clone());
}
}
// 3. Try matching input against aliases in SCHEMA
// This handles input "HS-CRP" matching schema "HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)"
for (upper_key, canonical) in valid_biomarkers {
if let Some(alias) = extract_parenthetical_alias(upper_key) {
if alias == name_upper {
tracing::debug!(
"Reverse alias matched '{}' -> '{}' (input is alias in schema)",
name, canonical
);
return Some(canonical.clone());
}
}
}
// 4. Fuzzy match with threshold - compare against uppercase keys
valid_biomarkers.iter()
.map(|(upper_key, canonical)| (canonical, jaro_winkler(&name_upper, upper_key)))
.filter(|(_, score)| *score >= FUZZY_THRESHOLD)
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
.map(|(matched_name, score)| {
tracing::debug!(
"Fuzzy matched '{}' -> '{}' (score: {:.3})",
name, matched_name, score
);
matched_name.clone()
})
}
/// Extract alias from parentheses or brackets at the end of a name.
/// Examples:
/// - "HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)" -> "HS-CRP"
/// - "EST. GLOMERULAR FILTRATION RATE (eGFR)" -> "EGFR"
/// - "LIPOPROTEIN (A) [LP(A)]" -> None (nested parens too complex)
fn extract_parenthetical_alias(name: &str) -> Option<String> {
let name = name.trim();
// Look for trailing (ALIAS) pattern
if let Some(start) = name.rfind('(') {
if name.ends_with(')') {
let alias = &name[start + 1..name.len() - 1];
// Only use if it looks like an abbreviation (mostly uppercase, short)
if alias.len() >= 2 && alias.len() <= 15 && !alias.contains(' ') {
return Some(alias.to_uppercase());
}
}
}
None
}
/// Merge multiple OCR results into one, filtering to only known biomarkers.
/// Uses fuzzy matching to handle name variations.
/// valid_biomarkers: HashMap<uppercase_name, original_case_name>
pub fn merge_results(results: Vec<DocumentAnnotation>, valid_biomarkers: &HashMap<String, String>) -> OcrResult {
let mut merged = OcrResult {
patient_name: None,
patient_age: None,
patient_gender: None,
lab_name: None,
test_date: None,
biomarkers: Vec::new(),
};
// Track biomarkers by canonical name, prefer ones with actual values
let mut biomarker_map: HashMap<String, Biomarker> = HashMap::new();
let mut skipped_count = 0;
let mut fuzzy_matched_count = 0;
for result in results {
// Take first non-null metadata
if merged.patient_name.is_none() && result.patient_name.is_some() {
merged.patient_name = result.patient_name;
}
if merged.patient_age.is_none() && result.patient_age.is_some() {
merged.patient_age = result.patient_age;
}
if merged.patient_gender.is_none() && result.patient_gender.is_some() {
merged.patient_gender = result.patient_gender;
}
if merged.lab_name.is_none() && result.lab_name.is_some() {
merged.lab_name = result.lab_name;
}
if merged.test_date.is_none() && result.test_date.is_some() {
merged.test_date = result.test_date;
}
// Merge biomarkers with fuzzy matching
if let Some(biomarkers) = result.biomarkers {
for mut bm in biomarkers {
let original_name = bm.name.clone();
// Try to find a matching canonical name
let canonical_name = match find_matching_biomarker(&bm.name, valid_biomarkers) {
Some(matched) => {
if matched != bm.name.to_uppercase() {
fuzzy_matched_count += 1;
}
// Update the biomarker name to canonical form
bm.name = matched.clone();
matched
}
None => {
tracing::debug!("Skipping unknown biomarker: {}", original_name);
skipped_count += 1;
continue;
}
};
let has_real_value = bm.value.is_some() ||
bm.value_string.as_ref().map(|s| !s.eq_ignore_ascii_case("not provided")).unwrap_or(false);
if let Some(existing) = biomarker_map.get(&canonical_name) {
let existing_has_real_value = existing.value.is_some() ||
existing.value_string.as_ref().map(|s| !s.eq_ignore_ascii_case("not provided")).unwrap_or(false);
// Replace only if current has real value and existing doesn't
if has_real_value && !existing_has_real_value {
biomarker_map.insert(canonical_name, bm);
}
} else {
biomarker_map.insert(canonical_name, bm);
}
}
}
}
if skipped_count > 0 {
tracing::info!("Skipped {} unknown biomarkers not in schema", skipped_count);
}
if fuzzy_matched_count > 0 {
tracing::info!("Fuzzy matched {} biomarkers to canonical names", fuzzy_matched_count);
}
// Collect biomarkers from map, filtering out "Not Provided" only entries
merged.biomarkers = biomarker_map.into_values()
.filter(|bm| {
bm.value.is_some() ||
bm.value_string.as_ref().map(|s| !s.eq_ignore_ascii_case("not provided")).unwrap_or(false)
})
.collect();
merged
}

View File

@@ -0,0 +1,211 @@
//! Mistral API integration for OCR.
use reqwest::multipart::{Form, Part};
use serde_json::{json, Value};
use std::path::PathBuf;
use std::time::Duration;
use tokio::fs;
use crate::config::MistralConfig;
use super::types::{Biomarker, DocumentAnnotation, MistralFileResponse, MistralOcrResponse};
use super::schema::strip_descriptions;
/// Upload a file to Mistral and return the file ID.
pub async fn upload_to_mistral(config: &MistralConfig, file_path: &PathBuf) -> Result<String, String> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(config.timeout_secs))
.build()
.map_err(|e| format!("Failed to create HTTP client: {}", e))?;
let file_bytes = fs::read(file_path)
.await
.map_err(|e| format!("Failed to read file: {}", e))?;
let file_name = file_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("document.pdf")
.to_string();
let part = Part::bytes(file_bytes)
.file_name(file_name)
.mime_str("application/pdf")
.map_err(|e| format!("MIME error: {}", e))?;
let form = Form::new()
.text("purpose", "ocr")
.part("file", part);
let response = client
.post("https://api.mistral.ai/v1/files")
.header("Authorization", format!("Bearer {}", config.api_key))
.multipart(form)
.send()
.await
.map_err(|e| format!("HTTP request failed: {}", e))?;
if !response.status().is_success() {
let error_text = response.text().await.unwrap_or_default();
return Err(format!("Mistral upload failed: {}", error_text));
}
let response_text = response.text().await
.map_err(|e| format!("Failed to read response: {}", e))?;
tracing::info!("Mistral file upload response: {}", response_text);
let result: MistralFileResponse = serde_json::from_str(&response_text)
.map_err(|e| format!("Failed to parse response: {} - raw: {}", e, response_text))?;
tracing::info!("Parsed file upload: id={}, num_pages={:?}", result.id, result.num_pages);
Ok(result.id)
}
/// Process OCR for specific pages of an uploaded document.
pub async fn ocr_pages(
config: &MistralConfig,
file_id: &str,
pages: &[usize],
) -> Result<DocumentAnnotation, String> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(config.timeout_secs))
.build()
.map_err(|e| format!("Failed to create HTTP client: {}", e))?;
// Load the complete schema from file
let schema_content = std::fs::read_to_string("ocr_schema.json")
.map_err(|e| format!("Failed to read ocr_schema.json: {}", e))?;
let mut schema: Value = serde_json::from_str(&schema_content)
.map_err(|e| format!("Failed to parse ocr_schema.json: {}", e))?;
// Clean the schema - remove meta-fields that Mistral echoes back
if let Some(obj) = schema.as_object_mut() {
obj.remove("$schema");
obj.remove("name");
obj.remove("description");
}
strip_descriptions(&mut schema);
let body = json!({
"model": config.ocr_model,
"document": {
"type": "file",
"file_id": file_id
},
"pages": pages,
"document_annotation_format": {
"type": "json_schema",
"json_schema": {
"name": "LabReport",
"schema": schema
}
}
});
let response = client
.post("https://api.mistral.ai/v1/ocr")
.header("Authorization", format!("Bearer {}", config.api_key))
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.map_err(|e| format!("OCR request failed: {}", e))?;
if !response.status().is_success() {
let error_text = response.text().await.unwrap_or_default();
return Err(format!("OCR failed: {}", error_text));
}
let result: MistralOcrResponse = response
.json()
.await
.map_err(|e| format!("Failed to parse OCR response: {}", e))?;
let annotation_str = result
.document_annotation
.ok_or_else(|| "No document annotation in response".to_string())?;
tracing::debug!("Raw annotation from Mistral: {}", &annotation_str);
// Mistral returns data wrapped in "properties" - extract it
let raw_json: Value = serde_json::from_str(&annotation_str)
.map_err(|e| format!("Failed to parse raw JSON: {}", e))?;
let data_json = if let Some(props) = raw_json.get("properties") {
props.clone()
} else {
raw_json
};
// Check if this is a schema-only response (no actual data)
if let Some(biomarkers) = data_json.get("biomarkers") {
if biomarkers.get("type").is_some() && biomarkers.get("items").is_some() {
tracing::warn!("Skipping schema-only response (no data for these pages)");
return Ok(DocumentAnnotation {
patient_name: None,
patient_age: None,
patient_gender: None,
lab_name: None,
test_date: None,
biomarkers: Some(vec![]),
});
}
}
let annotation = parse_annotation(&data_json)?;
tracing::info!("Parsed annotation: patient={:?}, biomarkers={}",
annotation.patient_name,
annotation.biomarkers.as_ref().map(|b| b.len()).unwrap_or(0));
Ok(annotation)
}
/// Parse annotation handling various Mistral response formats.
fn parse_annotation(data: &Value) -> Result<DocumentAnnotation, String> {
let patient_name = data.get("patient_name").and_then(|v| v.as_str()).map(|s| s.to_string());
let patient_age = data.get("patient_age").and_then(|v| v.as_i64()).map(|n| n as i32);
let patient_gender = data.get("patient_gender").and_then(|v| v.as_str()).map(|s| s.to_string());
let lab_name = data.get("lab_name").and_then(|v| v.as_str()).map(|s| s.to_string());
let test_date = data.get("test_date").and_then(|v| v.as_str()).map(|s| s.to_string());
// Parse biomarkers - handle nested "properties" format
let biomarkers = if let Some(bm_array) = data.get("biomarkers").and_then(|v| v.as_array()) {
let mut parsed: Vec<Biomarker> = vec![];
for item in bm_array {
// Try direct format first
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
parsed.push(Biomarker {
name: name.to_string(),
value: item.get("value").and_then(|v| v.as_f64()),
value_string: item.get("value_string").and_then(|v| v.as_str()).map(|s| s.to_string()),
unit: item.get("unit").and_then(|v| v.as_str()).map(|s| s.to_string()),
});
}
// Try nested "properties" format
else if let Some(props) = item.get("properties") {
if let Some(name) = props.get("name").and_then(|v| v.as_str()) {
parsed.push(Biomarker {
name: name.to_string(),
value: props.get("value").and_then(|v| v.as_f64()),
value_string: props.get("value_string").and_then(|v| v.as_str()).map(|s| s.to_string()),
unit: props.get("unit").and_then(|v| v.as_str()).map(|s| s.to_string()),
});
}
}
}
Some(parsed)
} else {
Some(vec![])
};
Ok(DocumentAnnotation {
patient_name,
patient_age,
patient_gender,
lab_name,
test_date,
biomarkers,
})
}

View File

@@ -0,0 +1,322 @@
//! OCR API handlers - Mistral OCR integration for document parsing.
mod matching;
mod mistral;
mod schema;
mod types;
use std::path::PathBuf;
use axum::{
extract::{Path, State},
http::StatusCode,
Json,
};
use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set};
use crate::models::bio::{biomarker, biomarker_entry, source};
// Re-export public types
pub use types::{ErrorResponse, OcrState, ParseResponse};
/// Get page count from a local file.
/// For PDFs, uses lopdf to read the actual page count.
/// For other file types (images, etc.), returns 1.
fn get_page_count(file_path: &PathBuf) -> usize {
let extension = file_path.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
if extension == "pdf" {
match lopdf::Document::load(file_path) {
Ok(doc) => {
let count = doc.get_pages().len();
tracing::info!("PDF page count (local): {}", count);
count
}
Err(e) => {
tracing::warn!("Failed to read PDF page count: {}, defaulting to 1", e);
1
}
}
} else {
tracing::info!("Non-PDF file, treating as 1 page");
1
}
}
/// POST /api/sources/:id/parse - Parse a source document using Mistral OCR.
/// Returns immediately with "processing" status; OCR runs in background.
pub async fn parse_source(
State(state): State<OcrState>,
Path(id): Path<i32>,
) -> Result<Json<ParseResponse>, (StatusCode, Json<ErrorResponse>)> {
use crate::models::user::user;
// 1. Get source from database
let source_entity = source::Entity::find_by_id(id)
.one(&state.db)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Database error: {}", e),
}),
)
})?
.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(ErrorResponse {
error: "Source not found".to_string(),
}),
)
})?;
// Check if already being processed
if source_entity.status == "processing" {
return Ok(Json(ParseResponse {
success: true,
biomarkers_count: 0,
message: "Already processing".to_string(),
}));
}
let file_path = PathBuf::from(&source_entity.file_path);
let user_id = source_entity.user_id;
// 2. Set status to "processing" immediately
let mut active_model: source::ActiveModel = source_entity.into();
active_model.status = Set("processing".to_string());
active_model.update(&state.db).await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: format!("Database update failed: {}", e),
}),
)
})?;
// 3. User must have their own Mistral API key configured
let user_api_key = if let Ok(Some(user_entity)) = user::Entity::find_by_id(user_id).one(&state.db).await {
user_entity.mistral_api_key
} else {
None
};
let api_key = match user_api_key {
Some(key) if !key.is_empty() => {
tracing::info!("Using user's Mistral API key for source {}", id);
key
}
_ => {
// Revert status back to pending since we can't process
if let Ok(Some(entity)) = source::Entity::find_by_id(id).one(&state.db).await {
let mut revert_model: source::ActiveModel = entity.into();
revert_model.status = Set("pending".to_string());
let _ = revert_model.update(&state.db).await;
}
return Err((
StatusCode::BAD_REQUEST,
Json(ErrorResponse {
error: "Please configure your Mistral API key in Profile settings".to_string(),
}),
));
}
};
let mut mistral_config = state.mistral.clone();
mistral_config.api_key = api_key;
// 4. Spawn background task for OCR processing
let db = state.db.clone();
tokio::spawn(async move {
if let Err(e) = process_ocr_background(db, mistral_config, id, file_path).await {
tracing::error!("Background OCR failed for source {}: {}", id, e);
}
});
// 5. Return immediately
Ok(Json(ParseResponse {
success: true,
biomarkers_count: 0,
message: "Processing started".to_string(),
}))
}
/// Background OCR processing task
async fn process_ocr_background(
db: sea_orm::DatabaseConnection,
mistral_config: crate::config::MistralConfig,
source_id: i32,
file_path: PathBuf,
) -> Result<(), String> {
// Upload file to Mistral
let file_id = mistral::upload_to_mistral(&mistral_config, &file_path)
.await
.map_err(|e| format!("Mistral upload failed: {}", e))?;
// Get page count locally from PDF
let max_pages = get_page_count(&file_path);
let chunk_size = mistral_config.max_pages_per_request as usize;
let max_retries = mistral_config.max_retries;
let mut all_results: Vec<types::DocumentAnnotation> = Vec::new();
let mut failed_chunk: Option<String> = None;
for start_page in (0..max_pages).step_by(chunk_size) {
// Check if source still exists before processing next chunk
let source_exists = source::Entity::find_by_id(source_id)
.one(&db)
.await
.map(|opt| opt.is_some())
.unwrap_or(false);
if !source_exists {
tracing::warn!("Source {} was deleted mid-parse, aborting OCR", source_id);
return Err("Source was deleted during parsing".to_string());
}
let pages: Vec<usize> = (start_page..std::cmp::min(start_page + chunk_size, max_pages)).collect();
tracing::info!("Processing OCR for pages {:?}", pages);
// Retry loop for this chunk
let mut attempts = 0;
let mut chunk_result = None;
while attempts <= max_retries {
match mistral::ocr_pages(&mistral_config, &file_id, &pages).await {
Ok(annotation) => {
chunk_result = Some(annotation);
break;
}
Err(e) => {
if e.contains("out of range") || e.contains("no pages") || e.contains("Invalid page") {
tracing::info!("Reached end of document at pages {:?}", pages);
break;
}
attempts += 1;
if attempts <= max_retries {
tracing::warn!("OCR chunk error (pages {:?}), attempt {}/{}: {}", pages, attempts, max_retries + 1, e);
} else {
tracing::error!("OCR chunk failed after {} attempts (pages {:?}): {}", max_retries + 1, pages, e);
failed_chunk = Some(format!("Pages {:?}: {}", pages, e));
}
}
}
}
if let Some(annotation) = chunk_result {
all_results.push(annotation);
} else if failed_chunk.is_some() {
break;
} else {
break;
}
}
// Handle failure
if let Some(error_msg) = failed_chunk {
// Update status to failed
if let Ok(Some(entity)) = source::Entity::find_by_id(source_id).one(&db).await {
let mut active_model: source::ActiveModel = entity.into();
active_model.status = Set("failed".to_string());
let _ = active_model.update(&db).await;
}
return Err(format!("OCR parsing failed: {}", error_msg));
}
if all_results.is_empty() {
// Update status to failed
if let Ok(Some(entity)) = source::Entity::find_by_id(source_id).one(&db).await {
let mut active_model: source::ActiveModel = entity.into();
active_model.status = Set("failed".to_string());
let _ = active_model.update(&db).await;
}
return Err("No OCR results obtained".to_string());
}
// Get valid biomarker names from schema
let valid_biomarkers = schema::extract_valid_biomarker_names()
.map_err(|e| format!("Failed to read schema: {}", e))?;
tracing::info!("Loaded {} valid biomarker names from schema", valid_biomarkers.len());
// Merge results with fuzzy matching
let merged = matching::merge_results(all_results, &valid_biomarkers);
// Save to database
let ocr_json = serde_json::to_string(&merged)
.map_err(|e| format!("JSON serialization failed: {}", e))?;
let source_entity = source::Entity::find_by_id(source_id)
.one(&db)
.await
.map_err(|e| format!("Database error: {}", e))?
.ok_or_else(|| "Source not found".to_string())?;
let user_id = source_entity.user_id;
let mut active_model: source::ActiveModel = source_entity.into();
active_model.ocr_data = Set(Some(ocr_json));
active_model.status = Set("parsed".to_string());
active_model.biomarker_count = Set(Some(merged.biomarkers.len() as i32));
active_model.update(&db).await
.map_err(|e| format!("Database update failed: {}", e))?;
// Create biomarker entries from parsed data
let mut entries_created = 0;
let now = chrono::Utc::now().naive_utc();
// Parse test_date or use current time
let measured_at = merged.test_date
.as_ref()
.and_then(|d| chrono::NaiveDate::parse_from_str(d, "%d %b %Y").ok()
.or_else(|| chrono::NaiveDate::parse_from_str(d, "%d %b, %Y").ok())
.or_else(|| chrono::NaiveDate::parse_from_str(d, "%Y-%m-%d").ok()))
.map(|date| date.and_hms_opt(0, 0, 0).unwrap())
.unwrap_or(now);
for bio in &merged.biomarkers {
// Skip if no numeric value
let Some(value) = bio.value else { continue };
// Look up biomarker ID by name
let biomarker_entity = biomarker::Entity::find()
.filter(biomarker::Column::Name.eq(&bio.name))
.one(&db)
.await
.map_err(|e| format!("Biomarker lookup error: {}", e))?;
let Some(biomarker_entity) = biomarker_entity else { continue };
// Create entry
let entry = biomarker_entry::ActiveModel {
biomarker_id: Set(biomarker_entity.id),
user_id: Set(user_id),
measured_at: Set(measured_at),
value: Set(value),
notes: Set(bio.unit.clone()),
source_id: Set(Some(source_id)),
created_at: Set(now),
};
// Insert (ignore if duplicate composite key)
if entry.insert(&db).await.is_ok() {
entries_created += 1;
}
}
tracing::info!(
"Successfully parsed {} biomarkers, created {} entries for source {}",
merged.biomarkers.len(),
entries_created,
source_id
);
Ok(())
}

View File

@@ -0,0 +1,51 @@
//! Schema handling utilities.
use serde_json::Value;
use std::collections::HashMap;
/// Extract valid biomarker names from the ocr_schema.json enum.
/// Returns a HashMap where keys are UPPERCASE names (for matching) and values are original case names.
pub fn extract_valid_biomarker_names() -> Result<HashMap<String, String>, String> {
let schema_content = std::fs::read_to_string("ocr_schema.json")
.map_err(|e| format!("Failed to read ocr_schema.json: {}", e))?;
let schema: Value = serde_json::from_str(&schema_content)
.map_err(|e| format!("Failed to parse ocr_schema.json: {}", e))?;
// Navigate to: properties.biomarkers.items.properties.name.enum
let names = schema
.get("properties")
.and_then(|p| p.get("biomarkers"))
.and_then(|b| b.get("items"))
.and_then(|i| i.get("properties"))
.and_then(|p| p.get("name"))
.and_then(|n| n.get("enum"))
.and_then(|e| e.as_array())
.ok_or_else(|| "Could not find biomarker name enum in schema".to_string())?;
// Key = uppercase (for matching), Value = original case (for DB lookup)
let valid_names: HashMap<String, String> = names
.iter()
.filter_map(|v| v.as_str())
.map(|s| (s.to_uppercase(), s.to_string()))
.collect();
Ok(valid_names)
}
/// Recursively remove "description" fields from a JSON value.
pub fn strip_descriptions(value: &mut Value) {
match value {
Value::Object(map) => {
map.remove("description");
for (_, v) in map.iter_mut() {
strip_descriptions(v);
}
}
Value::Array(arr) => {
for v in arr.iter_mut() {
strip_descriptions(v);
}
}
_ => {}
}
}

View File

@@ -0,0 +1,77 @@
//! Type definitions for OCR module.
use sea_orm::DatabaseConnection;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use crate::config::MistralConfig;
/// State for OCR handlers.
#[derive(Clone)]
pub struct OcrState {
pub db: DatabaseConnection,
pub uploads_path: PathBuf,
pub mistral: MistralConfig,
}
/// Response for parse endpoint.
#[derive(Serialize)]
pub struct ParseResponse {
pub success: bool,
pub biomarkers_count: usize,
pub message: String,
}
/// Error response.
#[derive(Serialize)]
pub struct ErrorResponse {
pub error: String,
}
/// Mistral file upload response.
#[derive(Deserialize)]
pub struct MistralFileResponse {
pub id: String,
#[allow(dead_code)]
pub bytes: i64,
pub num_pages: Option<usize>,
}
/// Mistral OCR response.
#[derive(Deserialize)]
pub struct MistralOcrResponse {
pub document_annotation: Option<String>,
#[allow(dead_code)]
pub pages: Option<Vec<serde_json::Value>>,
}
/// Extracted biomarker from OCR.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Biomarker {
pub name: String,
pub value: Option<f64>,
pub value_string: Option<String>,
pub unit: Option<String>,
}
/// Merged OCR result.
#[derive(Debug, Serialize, Deserialize)]
pub struct OcrResult {
pub patient_name: Option<String>,
pub patient_age: Option<i32>,
pub patient_gender: Option<String>,
pub lab_name: Option<String>,
pub test_date: Option<String>,
pub biomarkers: Vec<Biomarker>,
}
/// Document annotation from Mistral.
#[derive(Debug, Deserialize)]
pub struct DocumentAnnotation {
pub patient_name: Option<String>,
pub patient_age: Option<i32>,
pub patient_gender: Option<String>,
pub lab_name: Option<String>,
pub test_date: Option<String>,
pub biomarkers: Option<Vec<Biomarker>>,
}

View File

@@ -7,13 +7,13 @@ use axum::{
};
use axum_extra::extract::Multipart;
use chrono::Utc;
use sea_orm::{ActiveModelTrait, DatabaseConnection, EntityTrait, Set};
use sea_orm::{ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, Set};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use tokio::fs;
use tokio::io::AsyncWriteExt;
use crate::models::bio::source;
use crate::models::bio::{biomarker_entry, source};
/// Response for a source.
#[derive(Serialize)]
@@ -24,6 +24,8 @@ pub struct SourceResponse {
pub file_path: String,
pub file_type: String,
pub file_size: i64,
pub status: String,
pub biomarker_count: Option<i32>,
pub ocr_data: Option<String>,
pub description: Option<String>,
pub uploaded_at: String,
@@ -55,6 +57,8 @@ pub async fn list_sources(
file_path: s.file_path,
file_type: s.file_type,
file_size: s.file_size,
status: s.status,
biomarker_count: s.biomarker_count,
ocr_data: s.ocr_data,
description: s.description,
uploaded_at: s.uploaded_at.to_string(),
@@ -82,6 +86,8 @@ pub async fn get_source(
file_path: s.file_path,
file_type: s.file_type,
file_size: s.file_size,
status: s.status,
biomarker_count: s.biomarker_count,
ocr_data: s.ocr_data,
description: s.description,
uploaded_at: s.uploaded_at.to_string(),
@@ -166,6 +172,8 @@ pub async fn upload_source(
file_path: Set(file_path.to_string_lossy().to_string()),
file_type: Set(content_type.clone()),
file_size: Set(file_size),
status: Set("pending".to_string()),
biomarker_count: Set(None),
ocr_data: Set(None),
description: Set(description.clone()),
uploaded_at: Set(now),
@@ -187,6 +195,8 @@ pub async fn upload_source(
file_path: inserted.file_path,
file_type: inserted.file_type,
file_size: inserted.file_size,
status: inserted.status,
biomarker_count: inserted.biomarker_count,
ocr_data: inserted.ocr_data,
description: inserted.description,
uploaded_at: inserted.uploaded_at.to_string(),
@@ -205,6 +215,13 @@ pub async fn delete_source(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
// Delete related biomarker entries first (cascade delete)
biomarker_entry::Entity::delete_many()
.filter(biomarker_entry::Column::SourceId.eq(id))
.exec(&state.db)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// Delete file from disk
if let Err(e) = fs::remove_file(&s.file_path).await {
tracing::warn!("Failed to delete file {}: {:?}", s.file_path, e);
@@ -256,6 +273,8 @@ pub async fn update_ocr(
file_path: updated.file_path,
file_type: updated.file_type,
file_size: updated.file_size,
status: updated.status,
biomarker_count: updated.biomarker_count,
ocr_data: updated.ocr_data,
description: updated.description,
uploaded_at: updated.uploaded_at.to_string(),

View File

@@ -36,6 +36,7 @@ pub struct UpdateUserRequest {
pub alcohol: Option<bool>,
pub diet_id: Option<i32>,
pub avatar_url: Option<String>,
pub mistral_api_key: Option<String>,
}
/// Response for a user.
@@ -52,6 +53,7 @@ pub struct UserResponse {
pub alcohol: Option<bool>,
pub diet: Option<String>,
pub avatar_url: Option<String>,
pub has_mistral_key: bool,
pub created_at: String,
}
@@ -88,16 +90,17 @@ pub async fn list_users(
.into_iter()
.map(|u| UserResponse {
id: u.id,
username: u.username,
name: u.name,
username: u.username.clone(),
name: u.name.clone(),
role: role_map.get(&u.role_id).cloned().unwrap_or_default(),
height_cm: u.height_cm,
blood_type: u.blood_type,
blood_type: u.blood_type.clone(),
birthdate: u.birthdate.map(|d| d.to_string()),
smoking: u.smoking,
alcohol: u.alcohol,
diet: u.diet_id.and_then(|id| diet_map.get(&id).cloned()),
avatar_url: u.avatar_url,
avatar_url: u.avatar_url.clone(),
has_mistral_key: u.mistral_api_key.is_some(),
created_at: u.created_at.to_string(),
})
.collect();
@@ -145,6 +148,7 @@ pub async fn get_user(
alcohol: u.alcohol,
diet: diet_name,
avatar_url: u.avatar_url,
has_mistral_key: u.mistral_api_key.is_some(),
created_at: u.created_at.to_string(),
}))
}
@@ -229,6 +233,7 @@ pub async fn create_user(
alcohol: inserted.alcohol,
diet: diet_name,
avatar_url: inserted.avatar_url,
has_mistral_key: inserted.mistral_api_key.is_some(),
created_at: inserted.created_at.to_string(),
}))
}
@@ -279,6 +284,9 @@ pub async fn update_user(
if req.avatar_url.is_some() {
active.avatar_url = Set(req.avatar_url);
}
if req.mistral_api_key.is_some() {
active.mistral_api_key = Set(req.mistral_api_key);
}
active.updated_at = Set(now);
let updated = active
@@ -317,6 +325,7 @@ pub async fn update_user(
alcohol: updated.alcohol,
diet: diet_name,
avatar_url: updated.avatar_url,
has_mistral_key: updated.mistral_api_key.is_some(),
created_at: updated.created_at.to_string(),
}))
}

View File

@@ -143,6 +143,7 @@ fn create_router(db: DatabaseConnection, config: &config::Config) -> Router {
// Entries API
.route("/api/entries", post(handlers::entries::create_entry))
.route("/api/users/{user_id}/entries", get(handlers::entries::list_user_entries))
.route("/api/users/{user_id}/results", get(handlers::entries::get_user_results))
.route_layer(middleware::from_fn(require_auth));
// Sources routes (need separate state for uploads path)
@@ -156,13 +157,26 @@ fn create_router(db: DatabaseConnection, config: &config::Config) -> Router {
.route("/api/sources/{id}", get(handlers::sources::get_source)
.delete(handlers::sources::delete_source))
.route("/api/sources/{id}/ocr", put(handlers::sources::update_ocr))
.layer(axum::extract::DefaultBodyLimit::max(config.paths.max_upload_mb as usize * 1024 * 1024))
.route_layer(middleware::from_fn(require_auth))
.with_state(sources_state);
// OCR routes (need Mistral config)
let ocr_state = handlers::ocr::OcrState {
db: db.clone(),
uploads_path: PathBuf::from(&config.paths.uploads),
mistral: config.mistral.clone(),
};
let ocr_routes = Router::new()
.route("/api/sources/{id}/parse", post(handlers::ocr::parse_source))
.route_layer(middleware::from_fn(require_auth))
.with_state(ocr_state);
Router::new()
.merge(public_routes)
.merge(protected_routes)
.merge(sources_routes)
.merge(ocr_routes)
.layer(auth_layer)
.with_state(db)
}
@@ -185,10 +199,18 @@ async fn require_auth(
}
fn init_logging(config: &config::Config) {
let log_level = config.logging.level.parse().unwrap_or(tracing::Level::INFO);
// Build filter: use configured level for our code, but restrict sqlx/sea_orm
let filter_str = format!(
"{},sqlx=warn,sea_orm=warn",
config.logging.level
);
let filter = tracing_subscriber::filter::EnvFilter::try_new(&filter_str)
.unwrap_or_else(|_| tracing_subscriber::filter::EnvFilter::new("info,sqlx=warn,sea_orm=warn"));
tracing_subscriber::registry()
.with(tracing_subscriber::fmt::layer())
.with(tracing_subscriber::filter::LevelFilter::from_level(log_level))
.with(filter)
.init();
}

View File

@@ -27,6 +27,14 @@ pub struct Model {
/// File size in bytes
pub file_size: i64,
/// Parsing status: "pending", "processing", "parsed", "failed"
#[sea_orm(column_type = "Text")]
pub status: String,
/// Number of biomarkers extracted (populated after parsing)
#[sea_orm(nullable)]
pub biomarker_count: Option<i32>,
/// OCR parsed data as JSON
#[sea_orm(column_type = "Text", nullable)]
pub ocr_data: Option<String>,

View File

@@ -44,6 +44,9 @@ pub struct Model {
/// URL to profile avatar icon
pub avatar_url: Option<String>,
/// User's own Mistral API key (BYOK - Bring Your Own Key)
pub mistral_api_key: Option<String>,
pub created_at: DateTime,
pub updated_at: DateTime,
}

View File

@@ -650,8 +650,8 @@ select.input {
.biomarker-row {
display: flex;
align-items: center;
gap: var(--space-md);
padding: var(--space-xs) var(--space-sm);
gap: var(--space-sm);
padding: var(--space-sm);
border-radius: var(--radius-sm);
transition: background-color 0.15s;
}
@@ -661,11 +661,12 @@ select.input {
}
.biomarker-dot {
width: 10px;
height: 10px;
width: 12px;
height: 12px;
border-radius: 50%;
background: var(--text-secondary);
flex-shrink: 0;
box-shadow: 0 0 3px rgba(0, 0, 0, 0.2);
}
.biomarker-dot.status-low {
@@ -681,36 +682,49 @@ select.input {
}
.biomarker-info {
flex: 0 0 320px;
flex: 0 0 280px;
min-width: 0;
display: flex;
flex-direction: row;
align-items: baseline;
gap: var(--space-xs);
flex-direction: column;
gap: 2px;
}
.biomarker-info .biomarker-name {
font-size: 14px;
.biomarker-name {
font-size: 0.875rem;
font-weight: 500;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.biomarker-info .biomarker-unit {
font-size: 11px;
flex-shrink: 0;
.biomarker-unit,
.biomarker-value {
font-size: 0.75rem;
color: var(--text-secondary);
}
.biomarker-value {
font-weight: 600;
color: var(--text-primary);
}
/* Biomarker Scale Bar */
.biomarker-scale {
flex: 1;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
position: relative;
height: 16px;
gap: 4px;
}
.scale-bar {
width: 120px;
height: 6px;
border-radius: 3px;
width: 220px;
height: 8px;
border-radius: 4px;
background: var(--border);
position: relative;
overflow: visible;
}
.scale-bar.placeholder {
@@ -723,13 +737,26 @@ select.input {
.scale-marker {
position: absolute;
top: 0;
bottom: 0;
width: 3px;
background: var(--text-primary);
border-radius: 2px;
transform: translateX(-50%);
box-shadow: 0 0 2px rgba(0, 0, 0, 0.3);
top: 50%;
width: 12px;
height: 12px;
border-radius: 50%;
transform: translate(-50%, -50%);
box-shadow: 0 0 4px rgba(0, 0, 0, 0.4);
border: 2px solid var(--bg-secondary);
background: var(--accent);
}
.scale-labels {
display: flex;
justify-content: space-between;
width: 140px;
font-size: 0.65rem;
color: var(--text-secondary);
}
.text-muted {
color: var(--text-secondary);
}
/* App Layout with Sidebar */

View File

@@ -6,30 +6,61 @@ interface Category {
description: string | null
}
interface Biomarker {
id: number
category_id: number
interface BiomarkerResult {
biomarker_id: number
name: string
test_category: string
category_id: number
unit: string
methodology: string | null
value: number | null
measured_at: string | null
ref_min: number | null
ref_max: number | null
label: string
severity: number
}
// Severity to color mapping
const severityColors: Record<number, string> = {
0: 'var(--indicator-normal)', // Normal - green
1: 'var(--indicator-warning)', // Mild - yellow/orange
2: '#ff8c00', // Moderate - dark orange
3: 'var(--indicator-critical)', // Severe - red
4: '#8b0000', // Critical - dark red
}
export function DashboardPage() {
const [categories, setCategories] = useState<Category[]>([])
const [biomarkers, setBiomarkers] = useState<Biomarker[]>([])
const [results, setResults] = useState<BiomarkerResult[]>([])
const [expandedCategories, setExpandedCategories] = useState<Set<number>>(new Set())
const [loading, setLoading] = useState(true)
useEffect(() => {
Promise.all([
fetch('/api/categories', { credentials: 'include' }).then(r => r.json()),
fetch('/api/biomarkers', { credentials: 'include' }).then(r => r.json()),
]).then(([cats, bms]) => {
setCategories(cats)
setBiomarkers(bms)
const fetchData = async () => {
try {
// Get current user
const authRes = await fetch('/api/auth/me', { credentials: 'include' })
if (!authRes.ok) return
const authData = await authRes.json()
const user = authData.user
if (!user) return // Not authenticated
// Fetch categories and results in parallel
const [catsRes, resultsRes] = await Promise.all([
fetch('/api/categories', { credentials: 'include' }),
fetch(`/api/users/${user.id}/results`, { credentials: 'include' }),
])
if (catsRes.ok && resultsRes.ok) {
setCategories(await catsRes.json())
setResults(await resultsRes.json())
}
} catch (error) {
console.error('Failed to load dashboard data:', error)
} finally {
setLoading(false)
})
}
}
fetchData()
}, [])
const toggleCategory = (categoryId: number) => {
@@ -44,8 +75,20 @@ export function DashboardPage() {
})
}
const getBiomarkersForCategory = (categoryId: number) => {
return biomarkers.filter(b => b.category_id === categoryId)
const getResultsForCategory = (categoryId: number) => {
return results.filter(r => r.category_id === categoryId)
}
// Calculate scale bar position (0-100%)
const getScalePosition = (result: BiomarkerResult): number | null => {
if (result.value === null || result.ref_min === null || result.ref_max === null) {
return null
}
const range = result.ref_max - result.ref_min
if (range <= 0) return 50
// Clamp to 5-95% for visual bounds
const pos = ((result.value - result.ref_min) / range) * 100
return Math.max(5, Math.min(95, pos))
}
if (loading) {
@@ -56,15 +99,17 @@ export function DashboardPage() {
<div className="page">
<header className="page-header">
<h1>Dashboard</h1>
<p className="text-secondary">View all biomarker categories and their reference markers</p>
<p className="text-secondary">Your latest biomarker results</p>
</header>
<section>
<h2 className="mb-md">Biomarker Categories</h2>
<div className="flex-col gap-sm">
{categories.map(category => {
const categoryBiomarkers = getBiomarkersForCategory(category.id)
const categoryResults = getResultsForCategory(category.id)
const isExpanded = expandedCategories.has(category.id)
// Count how many have data
const withData = categoryResults.filter(r => r.value !== null).length
return (
<div key={category.id} className="card category-card">
@@ -75,7 +120,7 @@ export function DashboardPage() {
<div>
<span className="category-name">{category.name}</span>
<span className="text-secondary text-sm ml-sm">
({categoryBiomarkers.length} biomarkers)
({withData}/{categoryResults.length} biomarkers)
</span>
</div>
<img
@@ -90,24 +135,59 @@ export function DashboardPage() {
{isExpanded && (
<div className="category-content border-t p-sm">
{categoryBiomarkers.length === 0 ? (
{categoryResults.length === 0 ? (
<p className="text-secondary text-sm p-sm">
No biomarkers in this category
</p>
) : (
<div className="biomarker-list">
{categoryBiomarkers.map(biomarker => (
<div key={biomarker.id} className="biomarker-row">
<div className="biomarker-dot" title="No data"></div>
{categoryResults.map(result => {
const scalePos = getScalePosition(result)
const dotColor = result.value !== null
? severityColors[result.severity] || severityColors[0]
: 'var(--text-secondary)'
return (
<div key={result.biomarker_id} className="biomarker-row">
<div
className="biomarker-dot"
title={result.label}
style={{ backgroundColor: dotColor }}
/>
<div className="biomarker-info">
<span className="biomarker-name">{biomarker.name}</span>
<span className="biomarker-unit">{biomarker.unit}</span>
<span className="biomarker-name">{result.name}</span>
{result.value !== null ? (
<span className="biomarker-value">
{result.value.toFixed(2)} {result.unit}
</span>
) : (
<span className="biomarker-unit text-muted">
No data
</span>
)}
</div>
<div className="biomarker-scale">
<div className="scale-bar placeholder"></div>
<div className="scale-bar">
{scalePos !== null && (
<div
className="scale-marker"
style={{
left: `${scalePos}%`,
backgroundColor: dotColor
}}
/>
)}
</div>
{result.ref_min !== null && result.ref_max !== null && (
<div className="scale-labels">
<span>{result.ref_min}</span>
<span>{result.ref_max}</span>
</div>
)}
</div>
</div>
))}
)
})}
</div>
)}
</div>

View File

@@ -18,6 +18,7 @@ interface UserProfile {
alcohol: boolean | null
diet: string | null
avatar_url: string | null
has_mistral_key: boolean
}
export function ProfilePage() {
@@ -37,6 +38,8 @@ export function ProfilePage() {
const [alcohol, setAlcohol] = useState<boolean | null>(null)
const [dietId, setDietId] = useState<number | null>(null)
const [avatarUrl, setAvatarUrl] = useState<string | null>(null)
const [mistralApiKey, setMistralApiKey] = useState('')
const [hasMistralKey, setHasMistralKey] = useState(false)
const avatarOptions = [
...[1, 2, 3, 4, 5, 6, 7].map(i => `/icons/user/icons8-male-user-50${i === 1 ? '' : `-${i}`}.png`),
@@ -69,6 +72,7 @@ export function ProfilePage() {
const diet = dietsData.find((d: Diet) => d.name === profile.diet)
setDietId(diet?.id || null)
setAvatarUrl(profile.avatar_url)
setHasMistralKey(profile.has_mistral_key)
})
})
.finally(() => {
@@ -102,6 +106,7 @@ export function ProfilePage() {
alcohol,
diet_id: dietId,
avatar_url: avatarUrl,
mistral_api_key: mistralApiKey || null,
}),
})
@@ -277,6 +282,27 @@ export function ProfilePage() {
</div>
</div>
{/* API Keys */}
<div className="card mb-lg">
<h3 className="mb-md">API Keys</h3>
<p className="text-secondary text-sm mb-md">Use your own Mistral API key for document parsing (optional)</p>
<div className="form-group">
<label htmlFor="mistralKey">Mistral API Key</label>
<input
id="mistralKey"
type="password"
className="input"
value={mistralApiKey}
onChange={(e) => setMistralApiKey(e.target.value)}
placeholder={hasMistralKey ? '••••••••••••••••' : 'Enter your API key'}
/>
{hasMistralKey && (
<span className="text-xs text-secondary mt-xs">You have an API key configured. Enter a new one to update.</span>
)}
</div>
</div>
{message && (
<div className={message.type === 'success' ? 'success-message' : 'error-message'}>
{message.text}

View File

@@ -7,6 +7,8 @@ interface Source {
file_path: string
file_type: string
file_size: number
status: string
biomarker_count: number | null
ocr_data: string | null
description: string | null
uploaded_at: string
@@ -19,6 +21,7 @@ export function SourcesPage() {
const [error, setError] = useState<string | null>(null)
const [dragOver, setDragOver] = useState(false)
const [deleteConfirmId, setDeleteConfirmId] = useState<number | null>(null)
const [parsingId, setParsingId] = useState<number | null>(null)
const fileInputRef = useRef<HTMLInputElement>(null)
// Fetch sources on mount
@@ -98,6 +101,31 @@ export function SourcesPage() {
}
}
const handleParse = async (id: number) => {
setParsingId(id)
setError(null)
try {
const res = await fetch(`/api/sources/${id}/parse`, {
method: 'POST',
credentials: 'include',
})
if (res.ok) {
const data = await res.json()
// Refresh sources to show updated status
fetchSources()
console.log('Parsed:', data)
} else {
const err = await res.json()
setError(err.error || 'Parse failed')
}
} catch (e) {
console.error('Failed to parse:', e)
setError('Failed to parse document')
} finally {
setParsingId(null)
}
}
const formatFileSize = (bytes: number) => {
if (bytes < 1024) return `${bytes} B`
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
@@ -191,12 +219,31 @@ export function SourcesPage() {
</div>
</div>
<div className="flex gap-sm items-center">
{source.ocr_data ? (
{source.status === 'parsed' ? (
<span className="status-parsed flex items-center gap-xs text-xs">
<img src="/icons/general/icons8-checkmark-50.png" alt="Parsed" className="icon-sm" /> Parsed
<img src="/icons/general/icons8-checkmark-50.png" alt="Parsed" className="icon-sm" />
{source.biomarker_count ? `${source.biomarker_count} biomarkers` : 'Parsed'}
</span>
) : source.status === 'processing' ? (
<span className="status-processing text-xs text-secondary">
Processing...
</span>
) : source.status === 'failed' ? (
<button
className="btn btn-primary btn-sm"
onClick={() => handleParse(source.id)}
disabled={parsingId === source.id}
>
Retry
</button>
) : (
<span className="text-secondary text-xs">Pending</span>
<button
className="btn btn-primary btn-sm"
onClick={() => handleParse(source.id)}
disabled={parsingId === source.id}
>
Parse
</button>
)}
<button
className="btn btn-danger btn-sm"