Compare commits
4 Commits
c8b4beafff
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 923311e650 | |||
| 8919942322 | |||
| 0f277d6b3d | |||
| fc6376abec |
4
Makefile
4
Makefile
@@ -54,7 +54,7 @@ frontend-install:
|
|||||||
cd frontend && npm install
|
cd frontend && npm install
|
||||||
|
|
||||||
frontend-dev:
|
frontend-dev:
|
||||||
cd frontend && npm run dev
|
cd frontend && npm run dev -- --host 0.0.0.0
|
||||||
|
|
||||||
frontend-build:
|
frontend-build:
|
||||||
cd frontend && npm run build
|
cd frontend && npm run build
|
||||||
@@ -82,7 +82,7 @@ test: backend-test frontend-test
|
|||||||
|
|
||||||
serve:
|
serve:
|
||||||
@echo "Starting backend (port 3000) and frontend (port 5173)..."
|
@echo "Starting backend (port 3000) and frontend (port 5173)..."
|
||||||
@cd backend && cargo run -- serve & cd frontend && npm run dev
|
@cd backend && cargo run -- serve & cd frontend && npm run dev -- --host 0.0.0.0
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
cd backend && cargo clean
|
cd backend && cargo clean
|
||||||
|
|||||||
@@ -47,3 +47,9 @@ regex = "1"
|
|||||||
|
|
||||||
# CLI
|
# CLI
|
||||||
argh = "0.1"
|
argh = "0.1"
|
||||||
|
reqwest = { version = "0.12.26", features = ["multipart", "json"] }
|
||||||
|
serde_json = "1.0.145"
|
||||||
|
|
||||||
|
# PDF parsing for page count
|
||||||
|
lopdf = "0.36"
|
||||||
|
strsim = "0.11"
|
||||||
|
|||||||
245
backend/ocr_schema.json
Normal file
245
backend/ocr_schema.json
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"name": "LabReport",
|
||||||
|
"description": "Extract biomarker data from a medical lab report",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"patient_name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Full name of the patient"
|
||||||
|
},
|
||||||
|
"patient_age": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Age of the patient in years"
|
||||||
|
},
|
||||||
|
"patient_gender": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"male",
|
||||||
|
"female",
|
||||||
|
"other"
|
||||||
|
],
|
||||||
|
"description": "Gender of the patient"
|
||||||
|
},
|
||||||
|
"lab_name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Name of the laboratory"
|
||||||
|
},
|
||||||
|
"test_date": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Date when the sample was collected (YYYY-MM-DD format if possible)"
|
||||||
|
},
|
||||||
|
"report_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Report ID, barcode, or reference number"
|
||||||
|
},
|
||||||
|
"biomarkers": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "List of biomarker test results",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Name of the biomarker/test",
|
||||||
|
"enum": [
|
||||||
|
"ARSENIC",
|
||||||
|
"CADMIUM",
|
||||||
|
"MERCURY",
|
||||||
|
"LEAD",
|
||||||
|
"CHROMIUM",
|
||||||
|
"BARIUM",
|
||||||
|
"COBALT",
|
||||||
|
"CAESIUM",
|
||||||
|
"THALLIUM",
|
||||||
|
"URANIUM",
|
||||||
|
"STRONTIUM",
|
||||||
|
"ANTIMONY",
|
||||||
|
"TIN",
|
||||||
|
"MOLYBDENUM",
|
||||||
|
"SILVER",
|
||||||
|
"VANADIUM",
|
||||||
|
"BERYLLIUM",
|
||||||
|
"BISMUTH",
|
||||||
|
"SELENIUM",
|
||||||
|
"ALUMINIUM",
|
||||||
|
"NICKEL",
|
||||||
|
"MANGANESE",
|
||||||
|
"GLYCOSYLATED HEMOGLOBIN (HbA1c)",
|
||||||
|
"AVERAGE BLOOD GLUCOSE (ABG)",
|
||||||
|
"FASTING BLOOD SUGAR (GLUCOSE)",
|
||||||
|
"INSULIN FASTING",
|
||||||
|
"FRUCTOSAMINE",
|
||||||
|
"BLOOD KETONE (D3HB)",
|
||||||
|
"ERYTHROCYTE SEDIMENTATION RATE (ESR)",
|
||||||
|
"ANTI CCP (ACCP)",
|
||||||
|
"ANTI NUCLEAR ANTIBODIES (ANA)",
|
||||||
|
"HEMOGLOBIN",
|
||||||
|
"HEMATOCRIT (PCV)",
|
||||||
|
"TOTAL RED BLOOD CELL COUNT (RBC)",
|
||||||
|
"MEAN CORPUSCULAR VOLUME (MCV)",
|
||||||
|
"MEAN CORPUSCULAR HEMOGLOBIN (MCH)",
|
||||||
|
"MEAN CORP. HEMO. CONC (MCHC)",
|
||||||
|
"RED CELL DISTRIBUTION WIDTH - SD (RDW-SD)",
|
||||||
|
"RED CELL DISTRIBUTION WIDTH (RDW-CV)",
|
||||||
|
"TOTAL LEUCOCYTE COUNT (WBC)",
|
||||||
|
"NEUTROPHILS PERCENTAGE",
|
||||||
|
"LYMPHOCYTES PERCENTAGE",
|
||||||
|
"MONOCYTES PERCENTAGE",
|
||||||
|
"EOSINOPHILS PERCENTAGE",
|
||||||
|
"BASOPHILS PERCENTAGE",
|
||||||
|
"IMMATURE GRANULOCYTE PERCENTAGE (IG%)",
|
||||||
|
"NUCLEATED RED BLOOD CELLS %",
|
||||||
|
"NEUTROPHILS ABSOLUTE COUNT",
|
||||||
|
"LYMPHOCYTES ABSOLUTE COUNT",
|
||||||
|
"MONOCYTES - ABSOLUTE COUNT",
|
||||||
|
"BASOPHILS ABSOLUTE COUNT",
|
||||||
|
"EOSINOPHILS ABSOLUTE COUNT",
|
||||||
|
"IMMATURE GRANULOCYTES (IG)",
|
||||||
|
"NUCLEATED RED BLOOD CELLS",
|
||||||
|
"PLATELET COUNT",
|
||||||
|
"MEAN PLATELET VOLUME (MPV)",
|
||||||
|
"PLATELET DISTRIBUTION WIDTH (PDW)",
|
||||||
|
"PLATELET TO LARGE CELL RATIO (PLCR)",
|
||||||
|
"PLATELETCRIT (PCT)",
|
||||||
|
"VITAMIN A",
|
||||||
|
"VITAMIN E",
|
||||||
|
"VITAMIN K",
|
||||||
|
"VITAMIN B1 (THIAMIN)",
|
||||||
|
"VITAMIN B2 (RIBOFLAVIN)",
|
||||||
|
"VITAMIN B3 (NIACIN/NICOTINIC ACID)",
|
||||||
|
"VITAMIN B5 (PANTOTHENIC ACID)",
|
||||||
|
"VITAMIN B6 (PYRIDOXAL-5-PHOSPHATE)",
|
||||||
|
"VITAMIN B7 (BIOTIN)",
|
||||||
|
"VITAMIN B9 (FOLIC ACID)",
|
||||||
|
"VITAMIN B12 (COBALAMIN)",
|
||||||
|
"VITAMIN D TOTAL",
|
||||||
|
"VITAMIN D2",
|
||||||
|
"VITAMIN D3",
|
||||||
|
"CORTISOL",
|
||||||
|
"CORTICOSTERONE",
|
||||||
|
"ANDROSTENEDIONE",
|
||||||
|
"ESTRADIOL",
|
||||||
|
"TESTOSTERONE",
|
||||||
|
"PROGESTERONE",
|
||||||
|
"17-HYDROXYPROGESTERONE",
|
||||||
|
"DEHYDROEPIANDROSTERONE (DHEA)",
|
||||||
|
"DHEA - SULPHATE (DHEAS)",
|
||||||
|
"DEOXYCORTISOL",
|
||||||
|
"ALPHA-1-ANTITRYPSIN (AAT)",
|
||||||
|
"HOMOCYSTEINE",
|
||||||
|
"TROPONIN I",
|
||||||
|
"HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)",
|
||||||
|
"LIPOPROTEIN (A) [Lp(a)]",
|
||||||
|
"LIPOPROTEIN-ASSOCIATED PHOSPHOLIPASE A2 (LP-PLA2)",
|
||||||
|
"CYSTATIN C",
|
||||||
|
"BLOOD UREA NITROGEN (BUN)",
|
||||||
|
"UREA (CALCULATED)",
|
||||||
|
"CREATININE - SERUM",
|
||||||
|
"UREA / SR.CREATININE RATIO",
|
||||||
|
"BUN / SR.CREATININE RATIO",
|
||||||
|
"CALCIUM",
|
||||||
|
"URIC ACID",
|
||||||
|
"ESTIMATED GLOMERULAR FILTRATION RATE (eGFR)",
|
||||||
|
"TOTAL CHOLESTEROL",
|
||||||
|
"HDL CHOLESTEROL - DIRECT",
|
||||||
|
"LDL CHOLESTEROL - DIRECT",
|
||||||
|
"TRIGLYCERIDES",
|
||||||
|
"VLDL CHOLESTEROL",
|
||||||
|
"NON-HDL CHOLESTEROL",
|
||||||
|
"TC / HDL CHOLESTEROL RATIO",
|
||||||
|
"LDL / HDL RATIO",
|
||||||
|
"HDL / LDL RATIO",
|
||||||
|
"TRIG / HDL RATIO",
|
||||||
|
"APOLIPOPROTEIN - A1 (APO-A1)",
|
||||||
|
"APOLIPOPROTEIN - B (APO-B)",
|
||||||
|
"APO B / APO A1 RATIO",
|
||||||
|
"IRON",
|
||||||
|
"TOTAL IRON BINDING CAPACITY (TIBC)",
|
||||||
|
"% TRANSFERRIN SATURATION",
|
||||||
|
"FERRITIN",
|
||||||
|
"UNSAT. IRON-BINDING CAPACITY (UIBC)",
|
||||||
|
"ALKALINE PHOSPHATASE",
|
||||||
|
"BILIRUBIN - TOTAL",
|
||||||
|
"BILIRUBIN - DIRECT",
|
||||||
|
"BILIRUBIN (INDIRECT)",
|
||||||
|
"GAMMA GLUTAMYL TRANSFERASE (GGT)",
|
||||||
|
"ASPARTATE AMINOTRANSFERASE (SGOT)",
|
||||||
|
"ALANINE TRANSAMINASE (SGPT)",
|
||||||
|
"SGOT / SGPT RATIO",
|
||||||
|
"PROTEIN - TOTAL",
|
||||||
|
"ALBUMIN - SERUM",
|
||||||
|
"SERUM GLOBULIN",
|
||||||
|
"SERUM ALB/GLOBULIN RATIO",
|
||||||
|
"SODIUM",
|
||||||
|
"POTASSIUM",
|
||||||
|
"CHLORIDE",
|
||||||
|
"MAGNESIUM",
|
||||||
|
"TOTAL TRIIODOTHYRONINE (T3)",
|
||||||
|
"TOTAL THYROXINE (T4)",
|
||||||
|
"TSH ULTRASENSITIVE",
|
||||||
|
"SERUM COPPER",
|
||||||
|
"SERUM ZINC",
|
||||||
|
"AMYLASE",
|
||||||
|
"LIPASE",
|
||||||
|
"URINARY MICROALBUMIN",
|
||||||
|
"CREATININE - URINE",
|
||||||
|
"URI. ALBUMIN/CREATININE RATIO",
|
||||||
|
"URINE COLOUR",
|
||||||
|
"URINE APPEARANCE",
|
||||||
|
"URINE SPECIFIC GRAVITY",
|
||||||
|
"URINE PH",
|
||||||
|
"URINARY PROTEIN",
|
||||||
|
"URINARY GLUCOSE",
|
||||||
|
"URINE KETONE",
|
||||||
|
"URINARY BILIRUBIN",
|
||||||
|
"UROBILINOGEN",
|
||||||
|
"BILE SALT",
|
||||||
|
"BILE PIGMENT",
|
||||||
|
"URINE BLOOD",
|
||||||
|
"NITRITE",
|
||||||
|
"LEUCOCYTE ESTERASE",
|
||||||
|
"MUCUS",
|
||||||
|
"URINE RBC",
|
||||||
|
"URINARY LEUCOCYTES (PUS CELLS)",
|
||||||
|
"EPITHELIAL CELLS",
|
||||||
|
"CASTS",
|
||||||
|
"CRYSTALS",
|
||||||
|
"BACTERIA",
|
||||||
|
"YEAST",
|
||||||
|
"PARASITE",
|
||||||
|
"WEIGHT",
|
||||||
|
"HEIGHT",
|
||||||
|
"BODY MASS INDEX (BMI)",
|
||||||
|
"HEART RATE",
|
||||||
|
"BLOOD PRESSURE SYSTOLIC",
|
||||||
|
"BLOOD PRESSURE DIASTOLIC",
|
||||||
|
"OXYGEN SATURATION (SpO2)",
|
||||||
|
"BODY TEMPERATURE",
|
||||||
|
"STEPS",
|
||||||
|
"CALORIES BURNED"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "number",
|
||||||
|
"description": "Observed/measured value"
|
||||||
|
},
|
||||||
|
"value_string": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Value as string if non-numeric (e.g., 'Negative', 'Trace', '> 65')"
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unit of measurement"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"name"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"biomarkers"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ paths:
|
|||||||
database: "./data/zhealth.db"
|
database: "./data/zhealth.db"
|
||||||
logs: "./logs"
|
logs: "./logs"
|
||||||
uploads: "./data/uploads"
|
uploads: "./data/uploads"
|
||||||
|
max_upload_mb: 50 # Maximum file upload size in MB
|
||||||
|
|
||||||
logging:
|
logging:
|
||||||
level: "info" # Options: trace | debug | info | warn | error
|
level: "info" # Options: trace | debug | info | warn | error
|
||||||
@@ -29,3 +30,11 @@ ai:
|
|||||||
provider: "gemini" # Options: gemini | openai | anthropic
|
provider: "gemini" # Options: gemini | openai | anthropic
|
||||||
model: "gemini-3-flash-preview"
|
model: "gemini-3-flash-preview"
|
||||||
api_key: "${AI_API_KEY}"
|
api_key: "${AI_API_KEY}"
|
||||||
|
|
||||||
|
# Mistral OCR for document parsing
|
||||||
|
# Note: API key is set per-user in Profile settings (BYOK)
|
||||||
|
mistral:
|
||||||
|
ocr_model: "mistral-ocr-latest"
|
||||||
|
max_pages_per_request: 8
|
||||||
|
max_retries: 2 # Max retry attempts per chunk
|
||||||
|
timeout_secs: 120 # Request timeout in seconds
|
||||||
|
|||||||
@@ -214,7 +214,7 @@ biomarkers:
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
# DIABETES / METABOLIC - Scale-based interpretations
|
# DIABETES / METABOLIC - Scale-based interpretations
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
- name: "HbA1c"
|
- name: "GLYCOSYLATED HEMOGLOBIN (HbA1c)"
|
||||||
test_category: DIABETES
|
test_category: DIABETES
|
||||||
category: metabolic
|
category: metabolic
|
||||||
unit: "%"
|
unit: "%"
|
||||||
@@ -339,7 +339,7 @@ biomarkers:
|
|||||||
min: 36.0
|
min: 36.0
|
||||||
max: 44.0
|
max: 44.0
|
||||||
|
|
||||||
- name: "TOTAL RBC"
|
- name: "TOTAL RED BLOOD CELL COUNT (RBC)"
|
||||||
test_category: HEMOGRAM
|
test_category: HEMOGRAM
|
||||||
category: blood
|
category: blood
|
||||||
unit: "10^6/µL"
|
unit: "10^6/µL"
|
||||||
@@ -614,7 +614,7 @@ biomarkers:
|
|||||||
min: 0.13
|
min: 0.13
|
||||||
max: 1.19
|
max: 1.19
|
||||||
|
|
||||||
- name: "VITAMIN B1/THIAMIN"
|
- name: "VITAMIN B1 (THIAMIN)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -623,7 +623,7 @@ biomarkers:
|
|||||||
min: 0.5
|
min: 0.5
|
||||||
max: 4.0
|
max: 4.0
|
||||||
|
|
||||||
- name: "VITAMIN B2/RIBOFLAVIN"
|
- name: "VITAMIN B2 (RIBOFLAVIN)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -632,7 +632,7 @@ biomarkers:
|
|||||||
min: 1.6
|
min: 1.6
|
||||||
max: 68.2
|
max: 68.2
|
||||||
|
|
||||||
- name: "VITAMIN B3/NICOTINIC ACID"
|
- name: "VITAMIN B3 (NIACIN/NICOTINIC ACID)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -640,7 +640,7 @@ biomarkers:
|
|||||||
reference:
|
reference:
|
||||||
max: 5.0
|
max: 5.0
|
||||||
|
|
||||||
- name: "VITAMIN B5/PANTOTHENIC"
|
- name: "VITAMIN B5 (PANTOTHENIC ACID)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -649,7 +649,7 @@ biomarkers:
|
|||||||
min: 11.0
|
min: 11.0
|
||||||
max: 150.0
|
max: 150.0
|
||||||
|
|
||||||
- name: "VITAMIN B6/P5P"
|
- name: "VITAMIN B6 (PYRIDOXAL-5-PHOSPHATE)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -658,7 +658,7 @@ biomarkers:
|
|||||||
min: 5.0
|
min: 5.0
|
||||||
max: 50.0
|
max: 50.0
|
||||||
|
|
||||||
- name: "VITAMIN B7/BIOTIN"
|
- name: "VITAMIN B7 (BIOTIN)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -667,7 +667,7 @@ biomarkers:
|
|||||||
min: 0.2
|
min: 0.2
|
||||||
max: 3.0
|
max: 3.0
|
||||||
|
|
||||||
- name: "VITAMIN B9/FOLIC ACID"
|
- name: "VITAMIN B9 (FOLIC ACID)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "ng/mL"
|
unit: "ng/mL"
|
||||||
@@ -676,7 +676,7 @@ biomarkers:
|
|||||||
min: 0.2
|
min: 0.2
|
||||||
max: 20.0
|
max: 20.0
|
||||||
|
|
||||||
- name: "VITAMIN B-12"
|
- name: "VITAMIN B12 (COBALAMIN)"
|
||||||
test_category: VITAMIN
|
test_category: VITAMIN
|
||||||
category: vitamins
|
category: vitamins
|
||||||
unit: "pg/mL"
|
unit: "pg/mL"
|
||||||
@@ -951,7 +951,7 @@ biomarkers:
|
|||||||
- { min: 4, max: 10, label: "Moderate risk of future heart attack" }
|
- { min: 4, max: 10, label: "Moderate risk of future heart attack" }
|
||||||
- { min: 10, label: "Elevated risk of future heart attack" }
|
- { min: 10, label: "Elevated risk of future heart attack" }
|
||||||
|
|
||||||
- name: "HS-CRP"
|
- name: "HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)"
|
||||||
test_category: CARDIAC
|
test_category: CARDIAC
|
||||||
category: cardiac
|
category: cardiac
|
||||||
unit: "mg/L"
|
unit: "mg/L"
|
||||||
@@ -970,7 +970,7 @@ biomarkers:
|
|||||||
reference:
|
reference:
|
||||||
max: 30.0
|
max: 30.0
|
||||||
|
|
||||||
- name: "LP-PLA2"
|
- name: "LIPOPROTEIN-ASSOCIATED PHOSPHOLIPASE A2 (LP-PLA2)"
|
||||||
test_category: CARDIAC
|
test_category: CARDIAC
|
||||||
category: cardiac
|
category: cardiac
|
||||||
unit: "nmol/min/mL"
|
unit: "nmol/min/mL"
|
||||||
@@ -1062,7 +1062,7 @@ biomarkers:
|
|||||||
min: 2.6
|
min: 2.6
|
||||||
max: 6.0
|
max: 6.0
|
||||||
|
|
||||||
- name: "eGFR"
|
- name: "ESTIMATED GLOMERULAR FILTRATION RATE (eGFR)"
|
||||||
test_category: RENAL
|
test_category: RENAL
|
||||||
category: renal
|
category: renal
|
||||||
unit: "mL/min/1.73m²"
|
unit: "mL/min/1.73m²"
|
||||||
@@ -1733,7 +1733,7 @@ biomarkers:
|
|||||||
category: body
|
category: body
|
||||||
unit: "cm"
|
unit: "cm"
|
||||||
|
|
||||||
- name: "BMI"
|
- name: "BODY MASS INDEX (BMI)"
|
||||||
test_category: BODY
|
test_category: BODY
|
||||||
category: body
|
category: body
|
||||||
unit: "kg/m²"
|
unit: "kg/m²"
|
||||||
@@ -1773,7 +1773,7 @@ biomarkers:
|
|||||||
- { min: 80, max: 89, label: "High Blood Pressure Stage 1" }
|
- { min: 80, max: 89, label: "High Blood Pressure Stage 1" }
|
||||||
- { min: 90, label: "High Blood Pressure Stage 2" }
|
- { min: 90, label: "High Blood Pressure Stage 2" }
|
||||||
|
|
||||||
- name: "SPO2"
|
- name: "OXYGEN SATURATION (SpO2)"
|
||||||
test_category: VITALS
|
test_category: VITALS
|
||||||
category: vitals
|
category: vitals
|
||||||
unit: "%"
|
unit: "%"
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ pub struct Config {
|
|||||||
pub auth: AuthConfig,
|
pub auth: AuthConfig,
|
||||||
pub admin: AdminConfig,
|
pub admin: AdminConfig,
|
||||||
pub ai: AiConfig,
|
pub ai: AiConfig,
|
||||||
|
pub mistral: MistralConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
@@ -20,11 +21,12 @@ pub struct ServerConfig {
|
|||||||
pub port: u16,
|
pub port: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
pub struct PathsConfig {
|
pub struct PathsConfig {
|
||||||
pub database: String,
|
pub database: String,
|
||||||
pub logs: String,
|
pub logs: String,
|
||||||
pub uploads: String,
|
pub uploads: String,
|
||||||
|
pub max_upload_mb: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
@@ -53,6 +55,17 @@ pub struct AiConfig {
|
|||||||
pub api_key: String,
|
pub api_key: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
pub struct MistralConfig {
|
||||||
|
/// API key - NOT loaded from config, set at runtime from user's profile
|
||||||
|
#[serde(skip, default)]
|
||||||
|
pub api_key: String,
|
||||||
|
pub ocr_model: String,
|
||||||
|
pub max_pages_per_request: u32,
|
||||||
|
pub max_retries: u32,
|
||||||
|
pub timeout_secs: u64,
|
||||||
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
/// Load configuration from a YAML file.
|
/// Load configuration from a YAML file.
|
||||||
pub fn load<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
|
pub fn load<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
|
||||||
|
|||||||
@@ -6,8 +6,10 @@ use axum::{
|
|||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use sea_orm::{ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, QueryOrder, Set};
|
use sea_orm::{ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, QueryOrder, Set};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::models::bio::{biomarker, biomarker_entry};
|
use crate::models::bio::{biomarker, biomarker_entry, biomarker_reference_rule};
|
||||||
|
use crate::models::user::user;
|
||||||
|
|
||||||
/// Request to create a new biomarker entry.
|
/// Request to create a new biomarker entry.
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@@ -30,6 +32,23 @@ pub struct EntryResponse {
|
|||||||
pub notes: Option<String>,
|
pub notes: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Response for biomarker result with reference info.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct BiomarkerResult {
|
||||||
|
pub biomarker_id: i32,
|
||||||
|
pub name: String,
|
||||||
|
pub category_id: i32,
|
||||||
|
pub unit: String,
|
||||||
|
// Latest entry
|
||||||
|
pub value: Option<f64>,
|
||||||
|
pub measured_at: Option<String>,
|
||||||
|
// Reference info
|
||||||
|
pub ref_min: Option<f64>,
|
||||||
|
pub ref_max: Option<f64>,
|
||||||
|
pub label: String,
|
||||||
|
pub severity: i32,
|
||||||
|
}
|
||||||
|
|
||||||
/// POST /api/entries - Create a new biomarker entry.
|
/// POST /api/entries - Create a new biomarker entry.
|
||||||
pub async fn create_entry(
|
pub async fn create_entry(
|
||||||
State(db): State<DatabaseConnection>,
|
State(db): State<DatabaseConnection>,
|
||||||
@@ -103,7 +122,7 @@ pub async fn list_user_entries(
|
|||||||
.await
|
.await
|
||||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
let bm_map: std::collections::HashMap<i32, String> = biomarkers
|
let bm_map: HashMap<i32, String> = biomarkers
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|b| (b.id, b.name))
|
.map(|b| (b.id, b.name))
|
||||||
.collect();
|
.collect();
|
||||||
@@ -122,3 +141,143 @@ pub async fn list_user_entries(
|
|||||||
|
|
||||||
Ok(Json(items))
|
Ok(Json(items))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// GET /api/users/:user_id/results - Get latest biomarker results with reference rules.
|
||||||
|
pub async fn get_user_results(
|
||||||
|
State(db): State<DatabaseConnection>,
|
||||||
|
Path(user_id): Path<i32>,
|
||||||
|
) -> Result<Json<Vec<BiomarkerResult>>, StatusCode> {
|
||||||
|
// Get user profile for sex/age matching
|
||||||
|
let user_profile = user::Entity::find_by_id(user_id)
|
||||||
|
.one(&db)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::NOT_FOUND)?;
|
||||||
|
|
||||||
|
// Calculate age from birthdate
|
||||||
|
let user_age = user_profile.birthdate.map(|bd| {
|
||||||
|
let today = chrono::Utc::now().date_naive();
|
||||||
|
let years = today.years_since(bd).unwrap_or(0) as i32;
|
||||||
|
years
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fetch all biomarkers
|
||||||
|
let biomarkers = biomarker::Entity::find()
|
||||||
|
.all(&db)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Fetch all entries for this user, ordered by measured_at desc
|
||||||
|
let entries = biomarker_entry::Entity::find()
|
||||||
|
.filter(biomarker_entry::Column::UserId.eq(user_id))
|
||||||
|
.order_by_desc(biomarker_entry::Column::MeasuredAt)
|
||||||
|
.all(&db)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Build map of biomarker_id -> latest entry
|
||||||
|
let mut latest_entries: HashMap<i32, &biomarker_entry::Model> = HashMap::new();
|
||||||
|
for entry in &entries {
|
||||||
|
latest_entries.entry(entry.biomarker_id).or_insert(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch all reference rules
|
||||||
|
let rules = biomarker_reference_rule::Entity::find()
|
||||||
|
.order_by_asc(biomarker_reference_rule::Column::SortOrder)
|
||||||
|
.all(&db)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Group rules by biomarker_id
|
||||||
|
let mut rules_map: HashMap<i32, Vec<&biomarker_reference_rule::Model>> = HashMap::new();
|
||||||
|
for rule in &rules {
|
||||||
|
rules_map.entry(rule.biomarker_id).or_default().push(rule);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build results
|
||||||
|
let mut results: Vec<BiomarkerResult> = Vec::new();
|
||||||
|
|
||||||
|
for bm in &biomarkers {
|
||||||
|
let entry = latest_entries.get(&bm.id);
|
||||||
|
let value = entry.map(|e| e.value);
|
||||||
|
let measured_at = entry.map(|e| e.measured_at.to_string());
|
||||||
|
|
||||||
|
// Find matching reference rule
|
||||||
|
let bm_rules = rules_map.get(&bm.id).map(|v| v.as_slice()).unwrap_or(&[]);
|
||||||
|
let (ref_min, ref_max, label, severity) = find_matching_rule(bm_rules, value, user_age, None);
|
||||||
|
|
||||||
|
results.push(BiomarkerResult {
|
||||||
|
biomarker_id: bm.id,
|
||||||
|
name: bm.name.clone(),
|
||||||
|
category_id: bm.category_id,
|
||||||
|
unit: bm.unit.clone(),
|
||||||
|
value,
|
||||||
|
measured_at,
|
||||||
|
ref_min,
|
||||||
|
ref_max,
|
||||||
|
label,
|
||||||
|
severity,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Json(results))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the best matching reference rule for a value.
|
||||||
|
fn find_matching_rule(
|
||||||
|
rules: &[&biomarker_reference_rule::Model],
|
||||||
|
value: Option<f64>,
|
||||||
|
user_age: Option<i32>,
|
||||||
|
_user_sex: Option<&str>,
|
||||||
|
) -> (Option<f64>, Option<f64>, String, i32) {
|
||||||
|
// Default: no data
|
||||||
|
if rules.is_empty() {
|
||||||
|
return (None, None, "No reference".to_string(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the "range" type rule first (defines normal range)
|
||||||
|
let range_rule = rules.iter().find(|r| r.rule_type == "range");
|
||||||
|
let (ref_min, ref_max) = range_rule
|
||||||
|
.map(|r| (r.value_min, r.value_max))
|
||||||
|
.unwrap_or((None, None));
|
||||||
|
|
||||||
|
// If no value, return range with "No data" label
|
||||||
|
let Some(val) = value else {
|
||||||
|
return (ref_min, ref_max, "No data".to_string(), 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Find matching scale rule based on value
|
||||||
|
for rule in rules {
|
||||||
|
// Check age bounds
|
||||||
|
if let Some(min_age) = rule.age_min {
|
||||||
|
if user_age.map(|a| a < min_age).unwrap_or(false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(max_age) = rule.age_max {
|
||||||
|
if user_age.map(|a| a > max_age).unwrap_or(false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check value bounds
|
||||||
|
let min_ok = rule.value_min.map(|min| val >= min).unwrap_or(true);
|
||||||
|
let max_ok = rule.value_max.map(|max| val <= max).unwrap_or(true);
|
||||||
|
|
||||||
|
if min_ok && max_ok {
|
||||||
|
return (ref_min, ref_max, rule.label.clone(), rule.severity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No matching rule found, determine based on range
|
||||||
|
if let (Some(min), Some(max)) = (ref_min, ref_max) {
|
||||||
|
if val < min {
|
||||||
|
return (ref_min, ref_max, "Low".to_string(), 1);
|
||||||
|
} else if val > max {
|
||||||
|
return (ref_min, ref_max, "High".to_string(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(ref_min, ref_max, "Normal".to_string(), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,5 +5,6 @@ pub mod biomarkers;
|
|||||||
pub mod categories;
|
pub mod categories;
|
||||||
pub mod diets;
|
pub mod diets;
|
||||||
pub mod entries;
|
pub mod entries;
|
||||||
|
pub mod ocr;
|
||||||
pub mod sources;
|
pub mod sources;
|
||||||
pub mod users;
|
pub mod users;
|
||||||
|
|||||||
183
backend/src/handlers/ocr/matching.rs
Normal file
183
backend/src/handlers/ocr/matching.rs
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
//! Biomarker matching and merging logic.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use strsim::jaro_winkler;
|
||||||
|
|
||||||
|
use super::types::{Biomarker, DocumentAnnotation, OcrResult};
|
||||||
|
|
||||||
|
/// Fuzzy matching threshold (0.0 - 1.0).
|
||||||
|
/// Names with Jaro-Winkler similarity >= this value are considered a match.
|
||||||
|
const FUZZY_THRESHOLD: f64 = 0.90;
|
||||||
|
|
||||||
|
/// Find a matching biomarker name from the valid set.
|
||||||
|
/// Returns the canonical name (original case) if found (exact, alias, or fuzzy match).
|
||||||
|
///
|
||||||
|
/// Matching order:
|
||||||
|
/// 1. Exact match on full name (case-insensitive)
|
||||||
|
/// 2. Extract parenthetical alias from INPUT (e.g., `(HS-CRP)` from `HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)`)
|
||||||
|
/// 3. Extract parenthetical alias from SCHEMA (e.g., `HS-CRP` matches `HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)`)
|
||||||
|
/// 4. Fuzzy match with Jaro-Winkler (threshold 0.90)
|
||||||
|
///
|
||||||
|
/// valid_biomarkers: HashMap<uppercase_name, original_case_name>
|
||||||
|
fn find_matching_biomarker(name: &str, valid_biomarkers: &HashMap<String, String>) -> Option<String> {
|
||||||
|
let name_upper = name.to_uppercase();
|
||||||
|
|
||||||
|
// 1. Exact match first (fast path) - lookup by uppercase key, return original case value
|
||||||
|
if let Some(canonical) = valid_biomarkers.get(&name_upper) {
|
||||||
|
return Some(canonical.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Try extracting parenthetical alias from INPUT
|
||||||
|
if let Some(alias) = extract_parenthetical_alias(&name_upper) {
|
||||||
|
if let Some(canonical) = valid_biomarkers.get(&alias) {
|
||||||
|
tracing::debug!(
|
||||||
|
"Alias matched '{}' -> '{}' (extracted from parentheses in input)",
|
||||||
|
name, canonical
|
||||||
|
);
|
||||||
|
return Some(canonical.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Try matching input against aliases in SCHEMA
|
||||||
|
// This handles input "HS-CRP" matching schema "HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)"
|
||||||
|
for (upper_key, canonical) in valid_biomarkers {
|
||||||
|
if let Some(alias) = extract_parenthetical_alias(upper_key) {
|
||||||
|
if alias == name_upper {
|
||||||
|
tracing::debug!(
|
||||||
|
"Reverse alias matched '{}' -> '{}' (input is alias in schema)",
|
||||||
|
name, canonical
|
||||||
|
);
|
||||||
|
return Some(canonical.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Fuzzy match with threshold - compare against uppercase keys
|
||||||
|
valid_biomarkers.iter()
|
||||||
|
.map(|(upper_key, canonical)| (canonical, jaro_winkler(&name_upper, upper_key)))
|
||||||
|
.filter(|(_, score)| *score >= FUZZY_THRESHOLD)
|
||||||
|
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
|
||||||
|
.map(|(matched_name, score)| {
|
||||||
|
tracing::debug!(
|
||||||
|
"Fuzzy matched '{}' -> '{}' (score: {:.3})",
|
||||||
|
name, matched_name, score
|
||||||
|
);
|
||||||
|
matched_name.clone()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract alias from parentheses or brackets at the end of a name.
|
||||||
|
/// Examples:
|
||||||
|
/// - "HIGH SENSITIVITY C-REACTIVE PROTEIN (HS-CRP)" -> "HS-CRP"
|
||||||
|
/// - "EST. GLOMERULAR FILTRATION RATE (eGFR)" -> "EGFR"
|
||||||
|
/// - "LIPOPROTEIN (A) [LP(A)]" -> None (nested parens too complex)
|
||||||
|
fn extract_parenthetical_alias(name: &str) -> Option<String> {
|
||||||
|
let name = name.trim();
|
||||||
|
|
||||||
|
// Look for trailing (ALIAS) pattern
|
||||||
|
if let Some(start) = name.rfind('(') {
|
||||||
|
if name.ends_with(')') {
|
||||||
|
let alias = &name[start + 1..name.len() - 1];
|
||||||
|
// Only use if it looks like an abbreviation (mostly uppercase, short)
|
||||||
|
if alias.len() >= 2 && alias.len() <= 15 && !alias.contains(' ') {
|
||||||
|
return Some(alias.to_uppercase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge multiple OCR results into one, filtering to only known biomarkers.
|
||||||
|
/// Uses fuzzy matching to handle name variations.
|
||||||
|
/// valid_biomarkers: HashMap<uppercase_name, original_case_name>
|
||||||
|
pub fn merge_results(results: Vec<DocumentAnnotation>, valid_biomarkers: &HashMap<String, String>) -> OcrResult {
|
||||||
|
let mut merged = OcrResult {
|
||||||
|
patient_name: None,
|
||||||
|
patient_age: None,
|
||||||
|
patient_gender: None,
|
||||||
|
lab_name: None,
|
||||||
|
test_date: None,
|
||||||
|
biomarkers: Vec::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Track biomarkers by canonical name, prefer ones with actual values
|
||||||
|
let mut biomarker_map: HashMap<String, Biomarker> = HashMap::new();
|
||||||
|
let mut skipped_count = 0;
|
||||||
|
let mut fuzzy_matched_count = 0;
|
||||||
|
|
||||||
|
for result in results {
|
||||||
|
// Take first non-null metadata
|
||||||
|
if merged.patient_name.is_none() && result.patient_name.is_some() {
|
||||||
|
merged.patient_name = result.patient_name;
|
||||||
|
}
|
||||||
|
if merged.patient_age.is_none() && result.patient_age.is_some() {
|
||||||
|
merged.patient_age = result.patient_age;
|
||||||
|
}
|
||||||
|
if merged.patient_gender.is_none() && result.patient_gender.is_some() {
|
||||||
|
merged.patient_gender = result.patient_gender;
|
||||||
|
}
|
||||||
|
if merged.lab_name.is_none() && result.lab_name.is_some() {
|
||||||
|
merged.lab_name = result.lab_name;
|
||||||
|
}
|
||||||
|
if merged.test_date.is_none() && result.test_date.is_some() {
|
||||||
|
merged.test_date = result.test_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge biomarkers with fuzzy matching
|
||||||
|
if let Some(biomarkers) = result.biomarkers {
|
||||||
|
for mut bm in biomarkers {
|
||||||
|
let original_name = bm.name.clone();
|
||||||
|
|
||||||
|
// Try to find a matching canonical name
|
||||||
|
let canonical_name = match find_matching_biomarker(&bm.name, valid_biomarkers) {
|
||||||
|
Some(matched) => {
|
||||||
|
if matched != bm.name.to_uppercase() {
|
||||||
|
fuzzy_matched_count += 1;
|
||||||
|
}
|
||||||
|
// Update the biomarker name to canonical form
|
||||||
|
bm.name = matched.clone();
|
||||||
|
matched
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
tracing::debug!("Skipping unknown biomarker: {}", original_name);
|
||||||
|
skipped_count += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let has_real_value = bm.value.is_some() ||
|
||||||
|
bm.value_string.as_ref().map(|s| !s.eq_ignore_ascii_case("not provided")).unwrap_or(false);
|
||||||
|
|
||||||
|
if let Some(existing) = biomarker_map.get(&canonical_name) {
|
||||||
|
let existing_has_real_value = existing.value.is_some() ||
|
||||||
|
existing.value_string.as_ref().map(|s| !s.eq_ignore_ascii_case("not provided")).unwrap_or(false);
|
||||||
|
|
||||||
|
// Replace only if current has real value and existing doesn't
|
||||||
|
if has_real_value && !existing_has_real_value {
|
||||||
|
biomarker_map.insert(canonical_name, bm);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
biomarker_map.insert(canonical_name, bm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if skipped_count > 0 {
|
||||||
|
tracing::info!("Skipped {} unknown biomarkers not in schema", skipped_count);
|
||||||
|
}
|
||||||
|
if fuzzy_matched_count > 0 {
|
||||||
|
tracing::info!("Fuzzy matched {} biomarkers to canonical names", fuzzy_matched_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect biomarkers from map, filtering out "Not Provided" only entries
|
||||||
|
merged.biomarkers = biomarker_map.into_values()
|
||||||
|
.filter(|bm| {
|
||||||
|
bm.value.is_some() ||
|
||||||
|
bm.value_string.as_ref().map(|s| !s.eq_ignore_ascii_case("not provided")).unwrap_or(false)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
merged
|
||||||
|
}
|
||||||
211
backend/src/handlers/ocr/mistral.rs
Normal file
211
backend/src/handlers/ocr/mistral.rs
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
//! Mistral API integration for OCR.
|
||||||
|
|
||||||
|
use reqwest::multipart::{Form, Part};
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
use crate::config::MistralConfig;
|
||||||
|
use super::types::{Biomarker, DocumentAnnotation, MistralFileResponse, MistralOcrResponse};
|
||||||
|
use super::schema::strip_descriptions;
|
||||||
|
|
||||||
|
/// Upload a file to Mistral and return the file ID.
|
||||||
|
pub async fn upload_to_mistral(config: &MistralConfig, file_path: &PathBuf) -> Result<String, String> {
|
||||||
|
let client = reqwest::Client::builder()
|
||||||
|
.timeout(Duration::from_secs(config.timeout_secs))
|
||||||
|
.build()
|
||||||
|
.map_err(|e| format!("Failed to create HTTP client: {}", e))?;
|
||||||
|
|
||||||
|
let file_bytes = fs::read(file_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to read file: {}", e))?;
|
||||||
|
|
||||||
|
let file_name = file_path
|
||||||
|
.file_name()
|
||||||
|
.and_then(|n| n.to_str())
|
||||||
|
.unwrap_or("document.pdf")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let part = Part::bytes(file_bytes)
|
||||||
|
.file_name(file_name)
|
||||||
|
.mime_str("application/pdf")
|
||||||
|
.map_err(|e| format!("MIME error: {}", e))?;
|
||||||
|
|
||||||
|
let form = Form::new()
|
||||||
|
.text("purpose", "ocr")
|
||||||
|
.part("file", part);
|
||||||
|
|
||||||
|
let response = client
|
||||||
|
.post("https://api.mistral.ai/v1/files")
|
||||||
|
.header("Authorization", format!("Bearer {}", config.api_key))
|
||||||
|
.multipart(form)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("HTTP request failed: {}", e))?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
let error_text = response.text().await.unwrap_or_default();
|
||||||
|
return Err(format!("Mistral upload failed: {}", error_text));
|
||||||
|
}
|
||||||
|
|
||||||
|
let response_text = response.text().await
|
||||||
|
.map_err(|e| format!("Failed to read response: {}", e))?;
|
||||||
|
|
||||||
|
tracing::info!("Mistral file upload response: {}", response_text);
|
||||||
|
|
||||||
|
let result: MistralFileResponse = serde_json::from_str(&response_text)
|
||||||
|
.map_err(|e| format!("Failed to parse response: {} - raw: {}", e, response_text))?;
|
||||||
|
|
||||||
|
tracing::info!("Parsed file upload: id={}, num_pages={:?}", result.id, result.num_pages);
|
||||||
|
|
||||||
|
Ok(result.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process OCR for specific pages of an uploaded document.
|
||||||
|
pub async fn ocr_pages(
|
||||||
|
config: &MistralConfig,
|
||||||
|
file_id: &str,
|
||||||
|
pages: &[usize],
|
||||||
|
) -> Result<DocumentAnnotation, String> {
|
||||||
|
let client = reqwest::Client::builder()
|
||||||
|
.timeout(Duration::from_secs(config.timeout_secs))
|
||||||
|
.build()
|
||||||
|
.map_err(|e| format!("Failed to create HTTP client: {}", e))?;
|
||||||
|
|
||||||
|
// Load the complete schema from file
|
||||||
|
let schema_content = std::fs::read_to_string("ocr_schema.json")
|
||||||
|
.map_err(|e| format!("Failed to read ocr_schema.json: {}", e))?;
|
||||||
|
let mut schema: Value = serde_json::from_str(&schema_content)
|
||||||
|
.map_err(|e| format!("Failed to parse ocr_schema.json: {}", e))?;
|
||||||
|
|
||||||
|
// Clean the schema - remove meta-fields that Mistral echoes back
|
||||||
|
if let Some(obj) = schema.as_object_mut() {
|
||||||
|
obj.remove("$schema");
|
||||||
|
obj.remove("name");
|
||||||
|
obj.remove("description");
|
||||||
|
}
|
||||||
|
strip_descriptions(&mut schema);
|
||||||
|
|
||||||
|
let body = json!({
|
||||||
|
"model": config.ocr_model,
|
||||||
|
"document": {
|
||||||
|
"type": "file",
|
||||||
|
"file_id": file_id
|
||||||
|
},
|
||||||
|
"pages": pages,
|
||||||
|
"document_annotation_format": {
|
||||||
|
"type": "json_schema",
|
||||||
|
"json_schema": {
|
||||||
|
"name": "LabReport",
|
||||||
|
"schema": schema
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let response = client
|
||||||
|
.post("https://api.mistral.ai/v1/ocr")
|
||||||
|
.header("Authorization", format!("Bearer {}", config.api_key))
|
||||||
|
.header("Content-Type", "application/json")
|
||||||
|
.json(&body)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("OCR request failed: {}", e))?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
let error_text = response.text().await.unwrap_or_default();
|
||||||
|
return Err(format!("OCR failed: {}", error_text));
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: MistralOcrResponse = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to parse OCR response: {}", e))?;
|
||||||
|
|
||||||
|
let annotation_str = result
|
||||||
|
.document_annotation
|
||||||
|
.ok_or_else(|| "No document annotation in response".to_string())?;
|
||||||
|
|
||||||
|
tracing::debug!("Raw annotation from Mistral: {}", &annotation_str);
|
||||||
|
|
||||||
|
// Mistral returns data wrapped in "properties" - extract it
|
||||||
|
let raw_json: Value = serde_json::from_str(&annotation_str)
|
||||||
|
.map_err(|e| format!("Failed to parse raw JSON: {}", e))?;
|
||||||
|
|
||||||
|
let data_json = if let Some(props) = raw_json.get("properties") {
|
||||||
|
props.clone()
|
||||||
|
} else {
|
||||||
|
raw_json
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if this is a schema-only response (no actual data)
|
||||||
|
if let Some(biomarkers) = data_json.get("biomarkers") {
|
||||||
|
if biomarkers.get("type").is_some() && biomarkers.get("items").is_some() {
|
||||||
|
tracing::warn!("Skipping schema-only response (no data for these pages)");
|
||||||
|
return Ok(DocumentAnnotation {
|
||||||
|
patient_name: None,
|
||||||
|
patient_age: None,
|
||||||
|
patient_gender: None,
|
||||||
|
lab_name: None,
|
||||||
|
test_date: None,
|
||||||
|
biomarkers: Some(vec![]),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let annotation = parse_annotation(&data_json)?;
|
||||||
|
|
||||||
|
tracing::info!("Parsed annotation: patient={:?}, biomarkers={}",
|
||||||
|
annotation.patient_name,
|
||||||
|
annotation.biomarkers.as_ref().map(|b| b.len()).unwrap_or(0));
|
||||||
|
|
||||||
|
Ok(annotation)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse annotation handling various Mistral response formats.
|
||||||
|
fn parse_annotation(data: &Value) -> Result<DocumentAnnotation, String> {
|
||||||
|
let patient_name = data.get("patient_name").and_then(|v| v.as_str()).map(|s| s.to_string());
|
||||||
|
let patient_age = data.get("patient_age").and_then(|v| v.as_i64()).map(|n| n as i32);
|
||||||
|
let patient_gender = data.get("patient_gender").and_then(|v| v.as_str()).map(|s| s.to_string());
|
||||||
|
let lab_name = data.get("lab_name").and_then(|v| v.as_str()).map(|s| s.to_string());
|
||||||
|
let test_date = data.get("test_date").and_then(|v| v.as_str()).map(|s| s.to_string());
|
||||||
|
|
||||||
|
// Parse biomarkers - handle nested "properties" format
|
||||||
|
let biomarkers = if let Some(bm_array) = data.get("biomarkers").and_then(|v| v.as_array()) {
|
||||||
|
let mut parsed: Vec<Biomarker> = vec![];
|
||||||
|
for item in bm_array {
|
||||||
|
// Try direct format first
|
||||||
|
if let Some(name) = item.get("name").and_then(|v| v.as_str()) {
|
||||||
|
parsed.push(Biomarker {
|
||||||
|
name: name.to_string(),
|
||||||
|
value: item.get("value").and_then(|v| v.as_f64()),
|
||||||
|
value_string: item.get("value_string").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||||
|
unit: item.get("unit").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Try nested "properties" format
|
||||||
|
else if let Some(props) = item.get("properties") {
|
||||||
|
if let Some(name) = props.get("name").and_then(|v| v.as_str()) {
|
||||||
|
parsed.push(Biomarker {
|
||||||
|
name: name.to_string(),
|
||||||
|
value: props.get("value").and_then(|v| v.as_f64()),
|
||||||
|
value_string: props.get("value_string").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||||
|
unit: props.get("unit").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(parsed)
|
||||||
|
} else {
|
||||||
|
Some(vec![])
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(DocumentAnnotation {
|
||||||
|
patient_name,
|
||||||
|
patient_age,
|
||||||
|
patient_gender,
|
||||||
|
lab_name,
|
||||||
|
test_date,
|
||||||
|
biomarkers,
|
||||||
|
})
|
||||||
|
}
|
||||||
322
backend/src/handlers/ocr/mod.rs
Normal file
322
backend/src/handlers/ocr/mod.rs
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
//! OCR API handlers - Mistral OCR integration for document parsing.
|
||||||
|
|
||||||
|
mod matching;
|
||||||
|
mod mistral;
|
||||||
|
mod schema;
|
||||||
|
mod types;
|
||||||
|
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
extract::{Path, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Json,
|
||||||
|
};
|
||||||
|
use sea_orm::{ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter, Set};
|
||||||
|
|
||||||
|
use crate::models::bio::{biomarker, biomarker_entry, source};
|
||||||
|
|
||||||
|
// Re-export public types
|
||||||
|
pub use types::{ErrorResponse, OcrState, ParseResponse};
|
||||||
|
|
||||||
|
/// Get page count from a local file.
|
||||||
|
/// For PDFs, uses lopdf to read the actual page count.
|
||||||
|
/// For other file types (images, etc.), returns 1.
|
||||||
|
fn get_page_count(file_path: &PathBuf) -> usize {
|
||||||
|
let extension = file_path.extension()
|
||||||
|
.and_then(|e| e.to_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_lowercase();
|
||||||
|
|
||||||
|
if extension == "pdf" {
|
||||||
|
match lopdf::Document::load(file_path) {
|
||||||
|
Ok(doc) => {
|
||||||
|
let count = doc.get_pages().len();
|
||||||
|
tracing::info!("PDF page count (local): {}", count);
|
||||||
|
count
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!("Failed to read PDF page count: {}, defaulting to 1", e);
|
||||||
|
1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tracing::info!("Non-PDF file, treating as 1 page");
|
||||||
|
1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// POST /api/sources/:id/parse - Parse a source document using Mistral OCR.
|
||||||
|
/// Returns immediately with "processing" status; OCR runs in background.
|
||||||
|
pub async fn parse_source(
|
||||||
|
State(state): State<OcrState>,
|
||||||
|
Path(id): Path<i32>,
|
||||||
|
) -> Result<Json<ParseResponse>, (StatusCode, Json<ErrorResponse>)> {
|
||||||
|
use crate::models::user::user;
|
||||||
|
|
||||||
|
// 1. Get source from database
|
||||||
|
let source_entity = source::Entity::find_by_id(id)
|
||||||
|
.one(&state.db)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
Json(ErrorResponse {
|
||||||
|
error: format!("Database error: {}", e),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
(
|
||||||
|
StatusCode::NOT_FOUND,
|
||||||
|
Json(ErrorResponse {
|
||||||
|
error: "Source not found".to_string(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Check if already being processed
|
||||||
|
if source_entity.status == "processing" {
|
||||||
|
return Ok(Json(ParseResponse {
|
||||||
|
success: true,
|
||||||
|
biomarkers_count: 0,
|
||||||
|
message: "Already processing".to_string(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let file_path = PathBuf::from(&source_entity.file_path);
|
||||||
|
let user_id = source_entity.user_id;
|
||||||
|
|
||||||
|
// 2. Set status to "processing" immediately
|
||||||
|
let mut active_model: source::ActiveModel = source_entity.into();
|
||||||
|
active_model.status = Set("processing".to_string());
|
||||||
|
active_model.update(&state.db).await.map_err(|e| {
|
||||||
|
(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
Json(ErrorResponse {
|
||||||
|
error: format!("Database update failed: {}", e),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
// 3. User must have their own Mistral API key configured
|
||||||
|
let user_api_key = if let Ok(Some(user_entity)) = user::Entity::find_by_id(user_id).one(&state.db).await {
|
||||||
|
user_entity.mistral_api_key
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let api_key = match user_api_key {
|
||||||
|
Some(key) if !key.is_empty() => {
|
||||||
|
tracing::info!("Using user's Mistral API key for source {}", id);
|
||||||
|
key
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Revert status back to pending since we can't process
|
||||||
|
if let Ok(Some(entity)) = source::Entity::find_by_id(id).one(&state.db).await {
|
||||||
|
let mut revert_model: source::ActiveModel = entity.into();
|
||||||
|
revert_model.status = Set("pending".to_string());
|
||||||
|
let _ = revert_model.update(&state.db).await;
|
||||||
|
}
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
Json(ErrorResponse {
|
||||||
|
error: "Please configure your Mistral API key in Profile settings".to_string(),
|
||||||
|
}),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut mistral_config = state.mistral.clone();
|
||||||
|
mistral_config.api_key = api_key;
|
||||||
|
|
||||||
|
// 4. Spawn background task for OCR processing
|
||||||
|
let db = state.db.clone();
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = process_ocr_background(db, mistral_config, id, file_path).await {
|
||||||
|
tracing::error!("Background OCR failed for source {}: {}", id, e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// 5. Return immediately
|
||||||
|
Ok(Json(ParseResponse {
|
||||||
|
success: true,
|
||||||
|
biomarkers_count: 0,
|
||||||
|
message: "Processing started".to_string(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Background OCR processing task
|
||||||
|
async fn process_ocr_background(
|
||||||
|
db: sea_orm::DatabaseConnection,
|
||||||
|
mistral_config: crate::config::MistralConfig,
|
||||||
|
source_id: i32,
|
||||||
|
file_path: PathBuf,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
// Upload file to Mistral
|
||||||
|
let file_id = mistral::upload_to_mistral(&mistral_config, &file_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Mistral upload failed: {}", e))?;
|
||||||
|
|
||||||
|
// Get page count locally from PDF
|
||||||
|
let max_pages = get_page_count(&file_path);
|
||||||
|
let chunk_size = mistral_config.max_pages_per_request as usize;
|
||||||
|
let max_retries = mistral_config.max_retries;
|
||||||
|
let mut all_results: Vec<types::DocumentAnnotation> = Vec::new();
|
||||||
|
let mut failed_chunk: Option<String> = None;
|
||||||
|
|
||||||
|
for start_page in (0..max_pages).step_by(chunk_size) {
|
||||||
|
// Check if source still exists before processing next chunk
|
||||||
|
let source_exists = source::Entity::find_by_id(source_id)
|
||||||
|
.one(&db)
|
||||||
|
.await
|
||||||
|
.map(|opt| opt.is_some())
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
if !source_exists {
|
||||||
|
tracing::warn!("Source {} was deleted mid-parse, aborting OCR", source_id);
|
||||||
|
return Err("Source was deleted during parsing".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
let pages: Vec<usize> = (start_page..std::cmp::min(start_page + chunk_size, max_pages)).collect();
|
||||||
|
|
||||||
|
tracing::info!("Processing OCR for pages {:?}", pages);
|
||||||
|
|
||||||
|
// Retry loop for this chunk
|
||||||
|
let mut attempts = 0;
|
||||||
|
let mut chunk_result = None;
|
||||||
|
|
||||||
|
while attempts <= max_retries {
|
||||||
|
match mistral::ocr_pages(&mistral_config, &file_id, &pages).await {
|
||||||
|
Ok(annotation) => {
|
||||||
|
chunk_result = Some(annotation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
if e.contains("out of range") || e.contains("no pages") || e.contains("Invalid page") {
|
||||||
|
tracing::info!("Reached end of document at pages {:?}", pages);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
attempts += 1;
|
||||||
|
if attempts <= max_retries {
|
||||||
|
tracing::warn!("OCR chunk error (pages {:?}), attempt {}/{}: {}", pages, attempts, max_retries + 1, e);
|
||||||
|
} else {
|
||||||
|
tracing::error!("OCR chunk failed after {} attempts (pages {:?}): {}", max_retries + 1, pages, e);
|
||||||
|
failed_chunk = Some(format!("Pages {:?}: {}", pages, e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(annotation) = chunk_result {
|
||||||
|
all_results.push(annotation);
|
||||||
|
} else if failed_chunk.is_some() {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle failure
|
||||||
|
if let Some(error_msg) = failed_chunk {
|
||||||
|
// Update status to failed
|
||||||
|
if let Ok(Some(entity)) = source::Entity::find_by_id(source_id).one(&db).await {
|
||||||
|
let mut active_model: source::ActiveModel = entity.into();
|
||||||
|
active_model.status = Set("failed".to_string());
|
||||||
|
let _ = active_model.update(&db).await;
|
||||||
|
}
|
||||||
|
return Err(format!("OCR parsing failed: {}", error_msg));
|
||||||
|
}
|
||||||
|
|
||||||
|
if all_results.is_empty() {
|
||||||
|
// Update status to failed
|
||||||
|
if let Ok(Some(entity)) = source::Entity::find_by_id(source_id).one(&db).await {
|
||||||
|
let mut active_model: source::ActiveModel = entity.into();
|
||||||
|
active_model.status = Set("failed".to_string());
|
||||||
|
let _ = active_model.update(&db).await;
|
||||||
|
}
|
||||||
|
return Err("No OCR results obtained".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get valid biomarker names from schema
|
||||||
|
let valid_biomarkers = schema::extract_valid_biomarker_names()
|
||||||
|
.map_err(|e| format!("Failed to read schema: {}", e))?;
|
||||||
|
|
||||||
|
tracing::info!("Loaded {} valid biomarker names from schema", valid_biomarkers.len());
|
||||||
|
|
||||||
|
// Merge results with fuzzy matching
|
||||||
|
let merged = matching::merge_results(all_results, &valid_biomarkers);
|
||||||
|
|
||||||
|
// Save to database
|
||||||
|
let ocr_json = serde_json::to_string(&merged)
|
||||||
|
.map_err(|e| format!("JSON serialization failed: {}", e))?;
|
||||||
|
|
||||||
|
let source_entity = source::Entity::find_by_id(source_id)
|
||||||
|
.one(&db)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Database error: {}", e))?
|
||||||
|
.ok_or_else(|| "Source not found".to_string())?;
|
||||||
|
|
||||||
|
let user_id = source_entity.user_id;
|
||||||
|
|
||||||
|
let mut active_model: source::ActiveModel = source_entity.into();
|
||||||
|
active_model.ocr_data = Set(Some(ocr_json));
|
||||||
|
active_model.status = Set("parsed".to_string());
|
||||||
|
active_model.biomarker_count = Set(Some(merged.biomarkers.len() as i32));
|
||||||
|
|
||||||
|
active_model.update(&db).await
|
||||||
|
.map_err(|e| format!("Database update failed: {}", e))?;
|
||||||
|
|
||||||
|
// Create biomarker entries from parsed data
|
||||||
|
let mut entries_created = 0;
|
||||||
|
let now = chrono::Utc::now().naive_utc();
|
||||||
|
|
||||||
|
// Parse test_date or use current time
|
||||||
|
let measured_at = merged.test_date
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|d| chrono::NaiveDate::parse_from_str(d, "%d %b %Y").ok()
|
||||||
|
.or_else(|| chrono::NaiveDate::parse_from_str(d, "%d %b, %Y").ok())
|
||||||
|
.or_else(|| chrono::NaiveDate::parse_from_str(d, "%Y-%m-%d").ok()))
|
||||||
|
.map(|date| date.and_hms_opt(0, 0, 0).unwrap())
|
||||||
|
.unwrap_or(now);
|
||||||
|
|
||||||
|
for bio in &merged.biomarkers {
|
||||||
|
// Skip if no numeric value
|
||||||
|
let Some(value) = bio.value else { continue };
|
||||||
|
|
||||||
|
// Look up biomarker ID by name
|
||||||
|
let biomarker_entity = biomarker::Entity::find()
|
||||||
|
.filter(biomarker::Column::Name.eq(&bio.name))
|
||||||
|
.one(&db)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Biomarker lookup error: {}", e))?;
|
||||||
|
|
||||||
|
let Some(biomarker_entity) = biomarker_entity else { continue };
|
||||||
|
|
||||||
|
// Create entry
|
||||||
|
let entry = biomarker_entry::ActiveModel {
|
||||||
|
biomarker_id: Set(biomarker_entity.id),
|
||||||
|
user_id: Set(user_id),
|
||||||
|
measured_at: Set(measured_at),
|
||||||
|
value: Set(value),
|
||||||
|
notes: Set(bio.unit.clone()),
|
||||||
|
source_id: Set(Some(source_id)),
|
||||||
|
created_at: Set(now),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Insert (ignore if duplicate composite key)
|
||||||
|
if entry.insert(&db).await.is_ok() {
|
||||||
|
entries_created += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
"Successfully parsed {} biomarkers, created {} entries for source {}",
|
||||||
|
merged.biomarkers.len(),
|
||||||
|
entries_created,
|
||||||
|
source_id
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
51
backend/src/handlers/ocr/schema.rs
Normal file
51
backend/src/handlers/ocr/schema.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
//! Schema handling utilities.
|
||||||
|
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
/// Extract valid biomarker names from the ocr_schema.json enum.
|
||||||
|
/// Returns a HashMap where keys are UPPERCASE names (for matching) and values are original case names.
|
||||||
|
pub fn extract_valid_biomarker_names() -> Result<HashMap<String, String>, String> {
|
||||||
|
let schema_content = std::fs::read_to_string("ocr_schema.json")
|
||||||
|
.map_err(|e| format!("Failed to read ocr_schema.json: {}", e))?;
|
||||||
|
let schema: Value = serde_json::from_str(&schema_content)
|
||||||
|
.map_err(|e| format!("Failed to parse ocr_schema.json: {}", e))?;
|
||||||
|
|
||||||
|
// Navigate to: properties.biomarkers.items.properties.name.enum
|
||||||
|
let names = schema
|
||||||
|
.get("properties")
|
||||||
|
.and_then(|p| p.get("biomarkers"))
|
||||||
|
.and_then(|b| b.get("items"))
|
||||||
|
.and_then(|i| i.get("properties"))
|
||||||
|
.and_then(|p| p.get("name"))
|
||||||
|
.and_then(|n| n.get("enum"))
|
||||||
|
.and_then(|e| e.as_array())
|
||||||
|
.ok_or_else(|| "Could not find biomarker name enum in schema".to_string())?;
|
||||||
|
|
||||||
|
// Key = uppercase (for matching), Value = original case (for DB lookup)
|
||||||
|
let valid_names: HashMap<String, String> = names
|
||||||
|
.iter()
|
||||||
|
.filter_map(|v| v.as_str())
|
||||||
|
.map(|s| (s.to_uppercase(), s.to_string()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(valid_names)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively remove "description" fields from a JSON value.
|
||||||
|
pub fn strip_descriptions(value: &mut Value) {
|
||||||
|
match value {
|
||||||
|
Value::Object(map) => {
|
||||||
|
map.remove("description");
|
||||||
|
for (_, v) in map.iter_mut() {
|
||||||
|
strip_descriptions(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Array(arr) => {
|
||||||
|
for v in arr.iter_mut() {
|
||||||
|
strip_descriptions(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
77
backend/src/handlers/ocr/types.rs
Normal file
77
backend/src/handlers/ocr/types.rs
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
//! Type definitions for OCR module.
|
||||||
|
|
||||||
|
use sea_orm::DatabaseConnection;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use crate::config::MistralConfig;
|
||||||
|
|
||||||
|
/// State for OCR handlers.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct OcrState {
|
||||||
|
pub db: DatabaseConnection,
|
||||||
|
pub uploads_path: PathBuf,
|
||||||
|
pub mistral: MistralConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Response for parse endpoint.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct ParseResponse {
|
||||||
|
pub success: bool,
|
||||||
|
pub biomarkers_count: usize,
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Error response.
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct ErrorResponse {
|
||||||
|
pub error: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mistral file upload response.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct MistralFileResponse {
|
||||||
|
pub id: String,
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub bytes: i64,
|
||||||
|
pub num_pages: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mistral OCR response.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct MistralOcrResponse {
|
||||||
|
pub document_annotation: Option<String>,
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub pages: Option<Vec<serde_json::Value>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracted biomarker from OCR.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Biomarker {
|
||||||
|
pub name: String,
|
||||||
|
pub value: Option<f64>,
|
||||||
|
pub value_string: Option<String>,
|
||||||
|
pub unit: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merged OCR result.
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OcrResult {
|
||||||
|
pub patient_name: Option<String>,
|
||||||
|
pub patient_age: Option<i32>,
|
||||||
|
pub patient_gender: Option<String>,
|
||||||
|
pub lab_name: Option<String>,
|
||||||
|
pub test_date: Option<String>,
|
||||||
|
pub biomarkers: Vec<Biomarker>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Document annotation from Mistral.
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct DocumentAnnotation {
|
||||||
|
pub patient_name: Option<String>,
|
||||||
|
pub patient_age: Option<i32>,
|
||||||
|
pub patient_gender: Option<String>,
|
||||||
|
pub lab_name: Option<String>,
|
||||||
|
pub test_date: Option<String>,
|
||||||
|
pub biomarkers: Option<Vec<Biomarker>>,
|
||||||
|
}
|
||||||
@@ -7,13 +7,13 @@ use axum::{
|
|||||||
};
|
};
|
||||||
use axum_extra::extract::Multipart;
|
use axum_extra::extract::Multipart;
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use sea_orm::{ActiveModelTrait, DatabaseConnection, EntityTrait, Set};
|
use sea_orm::{ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, Set};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
use crate::models::bio::source;
|
use crate::models::bio::{biomarker_entry, source};
|
||||||
|
|
||||||
/// Response for a source.
|
/// Response for a source.
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
@@ -24,6 +24,8 @@ pub struct SourceResponse {
|
|||||||
pub file_path: String,
|
pub file_path: String,
|
||||||
pub file_type: String,
|
pub file_type: String,
|
||||||
pub file_size: i64,
|
pub file_size: i64,
|
||||||
|
pub status: String,
|
||||||
|
pub biomarker_count: Option<i32>,
|
||||||
pub ocr_data: Option<String>,
|
pub ocr_data: Option<String>,
|
||||||
pub description: Option<String>,
|
pub description: Option<String>,
|
||||||
pub uploaded_at: String,
|
pub uploaded_at: String,
|
||||||
@@ -55,6 +57,8 @@ pub async fn list_sources(
|
|||||||
file_path: s.file_path,
|
file_path: s.file_path,
|
||||||
file_type: s.file_type,
|
file_type: s.file_type,
|
||||||
file_size: s.file_size,
|
file_size: s.file_size,
|
||||||
|
status: s.status,
|
||||||
|
biomarker_count: s.biomarker_count,
|
||||||
ocr_data: s.ocr_data,
|
ocr_data: s.ocr_data,
|
||||||
description: s.description,
|
description: s.description,
|
||||||
uploaded_at: s.uploaded_at.to_string(),
|
uploaded_at: s.uploaded_at.to_string(),
|
||||||
@@ -82,6 +86,8 @@ pub async fn get_source(
|
|||||||
file_path: s.file_path,
|
file_path: s.file_path,
|
||||||
file_type: s.file_type,
|
file_type: s.file_type,
|
||||||
file_size: s.file_size,
|
file_size: s.file_size,
|
||||||
|
status: s.status,
|
||||||
|
biomarker_count: s.biomarker_count,
|
||||||
ocr_data: s.ocr_data,
|
ocr_data: s.ocr_data,
|
||||||
description: s.description,
|
description: s.description,
|
||||||
uploaded_at: s.uploaded_at.to_string(),
|
uploaded_at: s.uploaded_at.to_string(),
|
||||||
@@ -166,6 +172,8 @@ pub async fn upload_source(
|
|||||||
file_path: Set(file_path.to_string_lossy().to_string()),
|
file_path: Set(file_path.to_string_lossy().to_string()),
|
||||||
file_type: Set(content_type.clone()),
|
file_type: Set(content_type.clone()),
|
||||||
file_size: Set(file_size),
|
file_size: Set(file_size),
|
||||||
|
status: Set("pending".to_string()),
|
||||||
|
biomarker_count: Set(None),
|
||||||
ocr_data: Set(None),
|
ocr_data: Set(None),
|
||||||
description: Set(description.clone()),
|
description: Set(description.clone()),
|
||||||
uploaded_at: Set(now),
|
uploaded_at: Set(now),
|
||||||
@@ -187,6 +195,8 @@ pub async fn upload_source(
|
|||||||
file_path: inserted.file_path,
|
file_path: inserted.file_path,
|
||||||
file_type: inserted.file_type,
|
file_type: inserted.file_type,
|
||||||
file_size: inserted.file_size,
|
file_size: inserted.file_size,
|
||||||
|
status: inserted.status,
|
||||||
|
biomarker_count: inserted.biomarker_count,
|
||||||
ocr_data: inserted.ocr_data,
|
ocr_data: inserted.ocr_data,
|
||||||
description: inserted.description,
|
description: inserted.description,
|
||||||
uploaded_at: inserted.uploaded_at.to_string(),
|
uploaded_at: inserted.uploaded_at.to_string(),
|
||||||
@@ -205,6 +215,13 @@ pub async fn delete_source(
|
|||||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
.ok_or(StatusCode::NOT_FOUND)?;
|
.ok_or(StatusCode::NOT_FOUND)?;
|
||||||
|
|
||||||
|
// Delete related biomarker entries first (cascade delete)
|
||||||
|
biomarker_entry::Entity::delete_many()
|
||||||
|
.filter(biomarker_entry::Column::SourceId.eq(id))
|
||||||
|
.exec(&state.db)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
// Delete file from disk
|
// Delete file from disk
|
||||||
if let Err(e) = fs::remove_file(&s.file_path).await {
|
if let Err(e) = fs::remove_file(&s.file_path).await {
|
||||||
tracing::warn!("Failed to delete file {}: {:?}", s.file_path, e);
|
tracing::warn!("Failed to delete file {}: {:?}", s.file_path, e);
|
||||||
@@ -256,6 +273,8 @@ pub async fn update_ocr(
|
|||||||
file_path: updated.file_path,
|
file_path: updated.file_path,
|
||||||
file_type: updated.file_type,
|
file_type: updated.file_type,
|
||||||
file_size: updated.file_size,
|
file_size: updated.file_size,
|
||||||
|
status: updated.status,
|
||||||
|
biomarker_count: updated.biomarker_count,
|
||||||
ocr_data: updated.ocr_data,
|
ocr_data: updated.ocr_data,
|
||||||
description: updated.description,
|
description: updated.description,
|
||||||
uploaded_at: updated.uploaded_at.to_string(),
|
uploaded_at: updated.uploaded_at.to_string(),
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ pub struct UpdateUserRequest {
|
|||||||
pub alcohol: Option<bool>,
|
pub alcohol: Option<bool>,
|
||||||
pub diet_id: Option<i32>,
|
pub diet_id: Option<i32>,
|
||||||
pub avatar_url: Option<String>,
|
pub avatar_url: Option<String>,
|
||||||
|
pub mistral_api_key: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Response for a user.
|
/// Response for a user.
|
||||||
@@ -52,6 +53,7 @@ pub struct UserResponse {
|
|||||||
pub alcohol: Option<bool>,
|
pub alcohol: Option<bool>,
|
||||||
pub diet: Option<String>,
|
pub diet: Option<String>,
|
||||||
pub avatar_url: Option<String>,
|
pub avatar_url: Option<String>,
|
||||||
|
pub has_mistral_key: bool,
|
||||||
pub created_at: String,
|
pub created_at: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,16 +90,17 @@ pub async fn list_users(
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|u| UserResponse {
|
.map(|u| UserResponse {
|
||||||
id: u.id,
|
id: u.id,
|
||||||
username: u.username,
|
username: u.username.clone(),
|
||||||
name: u.name,
|
name: u.name.clone(),
|
||||||
role: role_map.get(&u.role_id).cloned().unwrap_or_default(),
|
role: role_map.get(&u.role_id).cloned().unwrap_or_default(),
|
||||||
height_cm: u.height_cm,
|
height_cm: u.height_cm,
|
||||||
blood_type: u.blood_type,
|
blood_type: u.blood_type.clone(),
|
||||||
birthdate: u.birthdate.map(|d| d.to_string()),
|
birthdate: u.birthdate.map(|d| d.to_string()),
|
||||||
smoking: u.smoking,
|
smoking: u.smoking,
|
||||||
alcohol: u.alcohol,
|
alcohol: u.alcohol,
|
||||||
diet: u.diet_id.and_then(|id| diet_map.get(&id).cloned()),
|
diet: u.diet_id.and_then(|id| diet_map.get(&id).cloned()),
|
||||||
avatar_url: u.avatar_url,
|
avatar_url: u.avatar_url.clone(),
|
||||||
|
has_mistral_key: u.mistral_api_key.is_some(),
|
||||||
created_at: u.created_at.to_string(),
|
created_at: u.created_at.to_string(),
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
@@ -145,6 +148,7 @@ pub async fn get_user(
|
|||||||
alcohol: u.alcohol,
|
alcohol: u.alcohol,
|
||||||
diet: diet_name,
|
diet: diet_name,
|
||||||
avatar_url: u.avatar_url,
|
avatar_url: u.avatar_url,
|
||||||
|
has_mistral_key: u.mistral_api_key.is_some(),
|
||||||
created_at: u.created_at.to_string(),
|
created_at: u.created_at.to_string(),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@@ -229,6 +233,7 @@ pub async fn create_user(
|
|||||||
alcohol: inserted.alcohol,
|
alcohol: inserted.alcohol,
|
||||||
diet: diet_name,
|
diet: diet_name,
|
||||||
avatar_url: inserted.avatar_url,
|
avatar_url: inserted.avatar_url,
|
||||||
|
has_mistral_key: inserted.mistral_api_key.is_some(),
|
||||||
created_at: inserted.created_at.to_string(),
|
created_at: inserted.created_at.to_string(),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@@ -279,6 +284,9 @@ pub async fn update_user(
|
|||||||
if req.avatar_url.is_some() {
|
if req.avatar_url.is_some() {
|
||||||
active.avatar_url = Set(req.avatar_url);
|
active.avatar_url = Set(req.avatar_url);
|
||||||
}
|
}
|
||||||
|
if req.mistral_api_key.is_some() {
|
||||||
|
active.mistral_api_key = Set(req.mistral_api_key);
|
||||||
|
}
|
||||||
active.updated_at = Set(now);
|
active.updated_at = Set(now);
|
||||||
|
|
||||||
let updated = active
|
let updated = active
|
||||||
@@ -317,6 +325,7 @@ pub async fn update_user(
|
|||||||
alcohol: updated.alcohol,
|
alcohol: updated.alcohol,
|
||||||
diet: diet_name,
|
diet: diet_name,
|
||||||
avatar_url: updated.avatar_url,
|
avatar_url: updated.avatar_url,
|
||||||
|
has_mistral_key: updated.mistral_api_key.is_some(),
|
||||||
created_at: updated.created_at.to_string(),
|
created_at: updated.created_at.to_string(),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -143,6 +143,7 @@ fn create_router(db: DatabaseConnection, config: &config::Config) -> Router {
|
|||||||
// Entries API
|
// Entries API
|
||||||
.route("/api/entries", post(handlers::entries::create_entry))
|
.route("/api/entries", post(handlers::entries::create_entry))
|
||||||
.route("/api/users/{user_id}/entries", get(handlers::entries::list_user_entries))
|
.route("/api/users/{user_id}/entries", get(handlers::entries::list_user_entries))
|
||||||
|
.route("/api/users/{user_id}/results", get(handlers::entries::get_user_results))
|
||||||
.route_layer(middleware::from_fn(require_auth));
|
.route_layer(middleware::from_fn(require_auth));
|
||||||
|
|
||||||
// Sources routes (need separate state for uploads path)
|
// Sources routes (need separate state for uploads path)
|
||||||
@@ -156,13 +157,26 @@ fn create_router(db: DatabaseConnection, config: &config::Config) -> Router {
|
|||||||
.route("/api/sources/{id}", get(handlers::sources::get_source)
|
.route("/api/sources/{id}", get(handlers::sources::get_source)
|
||||||
.delete(handlers::sources::delete_source))
|
.delete(handlers::sources::delete_source))
|
||||||
.route("/api/sources/{id}/ocr", put(handlers::sources::update_ocr))
|
.route("/api/sources/{id}/ocr", put(handlers::sources::update_ocr))
|
||||||
|
.layer(axum::extract::DefaultBodyLimit::max(config.paths.max_upload_mb as usize * 1024 * 1024))
|
||||||
.route_layer(middleware::from_fn(require_auth))
|
.route_layer(middleware::from_fn(require_auth))
|
||||||
.with_state(sources_state);
|
.with_state(sources_state);
|
||||||
|
|
||||||
|
// OCR routes (need Mistral config)
|
||||||
|
let ocr_state = handlers::ocr::OcrState {
|
||||||
|
db: db.clone(),
|
||||||
|
uploads_path: PathBuf::from(&config.paths.uploads),
|
||||||
|
mistral: config.mistral.clone(),
|
||||||
|
};
|
||||||
|
let ocr_routes = Router::new()
|
||||||
|
.route("/api/sources/{id}/parse", post(handlers::ocr::parse_source))
|
||||||
|
.route_layer(middleware::from_fn(require_auth))
|
||||||
|
.with_state(ocr_state);
|
||||||
|
|
||||||
Router::new()
|
Router::new()
|
||||||
.merge(public_routes)
|
.merge(public_routes)
|
||||||
.merge(protected_routes)
|
.merge(protected_routes)
|
||||||
.merge(sources_routes)
|
.merge(sources_routes)
|
||||||
|
.merge(ocr_routes)
|
||||||
.layer(auth_layer)
|
.layer(auth_layer)
|
||||||
.with_state(db)
|
.with_state(db)
|
||||||
}
|
}
|
||||||
@@ -185,10 +199,18 @@ async fn require_auth(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn init_logging(config: &config::Config) {
|
fn init_logging(config: &config::Config) {
|
||||||
let log_level = config.logging.level.parse().unwrap_or(tracing::Level::INFO);
|
// Build filter: use configured level for our code, but restrict sqlx/sea_orm
|
||||||
|
let filter_str = format!(
|
||||||
|
"{},sqlx=warn,sea_orm=warn",
|
||||||
|
config.logging.level
|
||||||
|
);
|
||||||
|
|
||||||
|
let filter = tracing_subscriber::filter::EnvFilter::try_new(&filter_str)
|
||||||
|
.unwrap_or_else(|_| tracing_subscriber::filter::EnvFilter::new("info,sqlx=warn,sea_orm=warn"));
|
||||||
|
|
||||||
tracing_subscriber::registry()
|
tracing_subscriber::registry()
|
||||||
.with(tracing_subscriber::fmt::layer())
|
.with(tracing_subscriber::fmt::layer())
|
||||||
.with(tracing_subscriber::filter::LevelFilter::from_level(log_level))
|
.with(filter)
|
||||||
.init();
|
.init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,14 @@ pub struct Model {
|
|||||||
/// File size in bytes
|
/// File size in bytes
|
||||||
pub file_size: i64,
|
pub file_size: i64,
|
||||||
|
|
||||||
|
/// Parsing status: "pending", "processing", "parsed", "failed"
|
||||||
|
#[sea_orm(column_type = "Text")]
|
||||||
|
pub status: String,
|
||||||
|
|
||||||
|
/// Number of biomarkers extracted (populated after parsing)
|
||||||
|
#[sea_orm(nullable)]
|
||||||
|
pub biomarker_count: Option<i32>,
|
||||||
|
|
||||||
/// OCR parsed data as JSON
|
/// OCR parsed data as JSON
|
||||||
#[sea_orm(column_type = "Text", nullable)]
|
#[sea_orm(column_type = "Text", nullable)]
|
||||||
pub ocr_data: Option<String>,
|
pub ocr_data: Option<String>,
|
||||||
|
|||||||
@@ -44,6 +44,9 @@ pub struct Model {
|
|||||||
/// URL to profile avatar icon
|
/// URL to profile avatar icon
|
||||||
pub avatar_url: Option<String>,
|
pub avatar_url: Option<String>,
|
||||||
|
|
||||||
|
/// User's own Mistral API key (BYOK - Bring Your Own Key)
|
||||||
|
pub mistral_api_key: Option<String>,
|
||||||
|
|
||||||
pub created_at: DateTime,
|
pub created_at: DateTime,
|
||||||
pub updated_at: DateTime,
|
pub updated_at: DateTime,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -650,8 +650,8 @@ select.input {
|
|||||||
.biomarker-row {
|
.biomarker-row {
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
gap: var(--space-md);
|
gap: var(--space-sm);
|
||||||
padding: var(--space-xs) var(--space-sm);
|
padding: var(--space-sm);
|
||||||
border-radius: var(--radius-sm);
|
border-radius: var(--radius-sm);
|
||||||
transition: background-color 0.15s;
|
transition: background-color 0.15s;
|
||||||
}
|
}
|
||||||
@@ -661,11 +661,12 @@ select.input {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.biomarker-dot {
|
.biomarker-dot {
|
||||||
width: 10px;
|
width: 12px;
|
||||||
height: 10px;
|
height: 12px;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
background: var(--text-secondary);
|
background: var(--text-secondary);
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
|
box-shadow: 0 0 3px rgba(0, 0, 0, 0.2);
|
||||||
}
|
}
|
||||||
|
|
||||||
.biomarker-dot.status-low {
|
.biomarker-dot.status-low {
|
||||||
@@ -681,36 +682,49 @@ select.input {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.biomarker-info {
|
.biomarker-info {
|
||||||
flex: 0 0 320px;
|
flex: 0 0 280px;
|
||||||
min-width: 0;
|
min-width: 0;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: row;
|
flex-direction: column;
|
||||||
align-items: baseline;
|
gap: 2px;
|
||||||
gap: var(--space-xs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.biomarker-info .biomarker-name {
|
.biomarker-name {
|
||||||
font-size: 14px;
|
font-size: 0.875rem;
|
||||||
|
font-weight: 500;
|
||||||
|
white-space: nowrap;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
}
|
}
|
||||||
|
|
||||||
.biomarker-info .biomarker-unit {
|
.biomarker-unit,
|
||||||
font-size: 11px;
|
.biomarker-value {
|
||||||
flex-shrink: 0;
|
font-size: 0.75rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.biomarker-value {
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--text-primary);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Biomarker Scale Bar */
|
/* Biomarker Scale Bar */
|
||||||
.biomarker-scale {
|
.biomarker-scale {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
display: flex;
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
position: relative;
|
gap: 4px;
|
||||||
height: 16px;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.scale-bar {
|
.scale-bar {
|
||||||
width: 120px;
|
width: 220px;
|
||||||
height: 6px;
|
height: 8px;
|
||||||
border-radius: 3px;
|
border-radius: 4px;
|
||||||
|
background: var(--border);
|
||||||
|
position: relative;
|
||||||
|
overflow: visible;
|
||||||
}
|
}
|
||||||
|
|
||||||
.scale-bar.placeholder {
|
.scale-bar.placeholder {
|
||||||
@@ -723,13 +737,26 @@ select.input {
|
|||||||
|
|
||||||
.scale-marker {
|
.scale-marker {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
top: 0;
|
top: 50%;
|
||||||
bottom: 0;
|
width: 12px;
|
||||||
width: 3px;
|
height: 12px;
|
||||||
background: var(--text-primary);
|
border-radius: 50%;
|
||||||
border-radius: 2px;
|
transform: translate(-50%, -50%);
|
||||||
transform: translateX(-50%);
|
box-shadow: 0 0 4px rgba(0, 0, 0, 0.4);
|
||||||
box-shadow: 0 0 2px rgba(0, 0, 0, 0.3);
|
border: 2px solid var(--bg-secondary);
|
||||||
|
background: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.scale-labels {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
width: 140px;
|
||||||
|
font-size: 0.65rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.text-muted {
|
||||||
|
color: var(--text-secondary);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* App Layout with Sidebar */
|
/* App Layout with Sidebar */
|
||||||
|
|||||||
@@ -6,30 +6,61 @@ interface Category {
|
|||||||
description: string | null
|
description: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Biomarker {
|
interface BiomarkerResult {
|
||||||
id: number
|
biomarker_id: number
|
||||||
category_id: number
|
|
||||||
name: string
|
name: string
|
||||||
test_category: string
|
category_id: number
|
||||||
unit: string
|
unit: string
|
||||||
methodology: string | null
|
value: number | null
|
||||||
|
measured_at: string | null
|
||||||
|
ref_min: number | null
|
||||||
|
ref_max: number | null
|
||||||
|
label: string
|
||||||
|
severity: number
|
||||||
|
}
|
||||||
|
|
||||||
|
// Severity to color mapping
|
||||||
|
const severityColors: Record<number, string> = {
|
||||||
|
0: 'var(--indicator-normal)', // Normal - green
|
||||||
|
1: 'var(--indicator-warning)', // Mild - yellow/orange
|
||||||
|
2: '#ff8c00', // Moderate - dark orange
|
||||||
|
3: 'var(--indicator-critical)', // Severe - red
|
||||||
|
4: '#8b0000', // Critical - dark red
|
||||||
}
|
}
|
||||||
|
|
||||||
export function DashboardPage() {
|
export function DashboardPage() {
|
||||||
const [categories, setCategories] = useState<Category[]>([])
|
const [categories, setCategories] = useState<Category[]>([])
|
||||||
const [biomarkers, setBiomarkers] = useState<Biomarker[]>([])
|
const [results, setResults] = useState<BiomarkerResult[]>([])
|
||||||
const [expandedCategories, setExpandedCategories] = useState<Set<number>>(new Set())
|
const [expandedCategories, setExpandedCategories] = useState<Set<number>>(new Set())
|
||||||
const [loading, setLoading] = useState(true)
|
const [loading, setLoading] = useState(true)
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
Promise.all([
|
const fetchData = async () => {
|
||||||
fetch('/api/categories', { credentials: 'include' }).then(r => r.json()),
|
try {
|
||||||
fetch('/api/biomarkers', { credentials: 'include' }).then(r => r.json()),
|
// Get current user
|
||||||
]).then(([cats, bms]) => {
|
const authRes = await fetch('/api/auth/me', { credentials: 'include' })
|
||||||
setCategories(cats)
|
if (!authRes.ok) return
|
||||||
setBiomarkers(bms)
|
const authData = await authRes.json()
|
||||||
|
const user = authData.user
|
||||||
|
if (!user) return // Not authenticated
|
||||||
|
|
||||||
|
// Fetch categories and results in parallel
|
||||||
|
const [catsRes, resultsRes] = await Promise.all([
|
||||||
|
fetch('/api/categories', { credentials: 'include' }),
|
||||||
|
fetch(`/api/users/${user.id}/results`, { credentials: 'include' }),
|
||||||
|
])
|
||||||
|
|
||||||
|
if (catsRes.ok && resultsRes.ok) {
|
||||||
|
setCategories(await catsRes.json())
|
||||||
|
setResults(await resultsRes.json())
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to load dashboard data:', error)
|
||||||
|
} finally {
|
||||||
setLoading(false)
|
setLoading(false)
|
||||||
})
|
}
|
||||||
|
}
|
||||||
|
fetchData()
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
const toggleCategory = (categoryId: number) => {
|
const toggleCategory = (categoryId: number) => {
|
||||||
@@ -44,8 +75,20 @@ export function DashboardPage() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const getBiomarkersForCategory = (categoryId: number) => {
|
const getResultsForCategory = (categoryId: number) => {
|
||||||
return biomarkers.filter(b => b.category_id === categoryId)
|
return results.filter(r => r.category_id === categoryId)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate scale bar position (0-100%)
|
||||||
|
const getScalePosition = (result: BiomarkerResult): number | null => {
|
||||||
|
if (result.value === null || result.ref_min === null || result.ref_max === null) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
const range = result.ref_max - result.ref_min
|
||||||
|
if (range <= 0) return 50
|
||||||
|
// Clamp to 5-95% for visual bounds
|
||||||
|
const pos = ((result.value - result.ref_min) / range) * 100
|
||||||
|
return Math.max(5, Math.min(95, pos))
|
||||||
}
|
}
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
@@ -56,15 +99,17 @@ export function DashboardPage() {
|
|||||||
<div className="page">
|
<div className="page">
|
||||||
<header className="page-header">
|
<header className="page-header">
|
||||||
<h1>Dashboard</h1>
|
<h1>Dashboard</h1>
|
||||||
<p className="text-secondary">View all biomarker categories and their reference markers</p>
|
<p className="text-secondary">Your latest biomarker results</p>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<section>
|
<section>
|
||||||
<h2 className="mb-md">Biomarker Categories</h2>
|
<h2 className="mb-md">Biomarker Categories</h2>
|
||||||
<div className="flex-col gap-sm">
|
<div className="flex-col gap-sm">
|
||||||
{categories.map(category => {
|
{categories.map(category => {
|
||||||
const categoryBiomarkers = getBiomarkersForCategory(category.id)
|
const categoryResults = getResultsForCategory(category.id)
|
||||||
const isExpanded = expandedCategories.has(category.id)
|
const isExpanded = expandedCategories.has(category.id)
|
||||||
|
// Count how many have data
|
||||||
|
const withData = categoryResults.filter(r => r.value !== null).length
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div key={category.id} className="card category-card">
|
<div key={category.id} className="card category-card">
|
||||||
@@ -75,7 +120,7 @@ export function DashboardPage() {
|
|||||||
<div>
|
<div>
|
||||||
<span className="category-name">{category.name}</span>
|
<span className="category-name">{category.name}</span>
|
||||||
<span className="text-secondary text-sm ml-sm">
|
<span className="text-secondary text-sm ml-sm">
|
||||||
({categoryBiomarkers.length} biomarkers)
|
({withData}/{categoryResults.length} biomarkers)
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<img
|
<img
|
||||||
@@ -90,24 +135,59 @@ export function DashboardPage() {
|
|||||||
|
|
||||||
{isExpanded && (
|
{isExpanded && (
|
||||||
<div className="category-content border-t p-sm">
|
<div className="category-content border-t p-sm">
|
||||||
{categoryBiomarkers.length === 0 ? (
|
{categoryResults.length === 0 ? (
|
||||||
<p className="text-secondary text-sm p-sm">
|
<p className="text-secondary text-sm p-sm">
|
||||||
No biomarkers in this category
|
No biomarkers in this category
|
||||||
</p>
|
</p>
|
||||||
) : (
|
) : (
|
||||||
<div className="biomarker-list">
|
<div className="biomarker-list">
|
||||||
{categoryBiomarkers.map(biomarker => (
|
{categoryResults.map(result => {
|
||||||
<div key={biomarker.id} className="biomarker-row">
|
const scalePos = getScalePosition(result)
|
||||||
<div className="biomarker-dot" title="No data"></div>
|
const dotColor = result.value !== null
|
||||||
|
? severityColors[result.severity] || severityColors[0]
|
||||||
|
: 'var(--text-secondary)'
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={result.biomarker_id} className="biomarker-row">
|
||||||
|
<div
|
||||||
|
className="biomarker-dot"
|
||||||
|
title={result.label}
|
||||||
|
style={{ backgroundColor: dotColor }}
|
||||||
|
/>
|
||||||
<div className="biomarker-info">
|
<div className="biomarker-info">
|
||||||
<span className="biomarker-name">{biomarker.name}</span>
|
<span className="biomarker-name">{result.name}</span>
|
||||||
<span className="biomarker-unit">{biomarker.unit}</span>
|
{result.value !== null ? (
|
||||||
|
<span className="biomarker-value">
|
||||||
|
{result.value.toFixed(2)} {result.unit}
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="biomarker-unit text-muted">
|
||||||
|
No data
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className="biomarker-scale">
|
<div className="biomarker-scale">
|
||||||
<div className="scale-bar placeholder"></div>
|
<div className="scale-bar">
|
||||||
|
{scalePos !== null && (
|
||||||
|
<div
|
||||||
|
className="scale-marker"
|
||||||
|
style={{
|
||||||
|
left: `${scalePos}%`,
|
||||||
|
backgroundColor: dotColor
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{result.ref_min !== null && result.ref_max !== null && (
|
||||||
|
<div className="scale-labels">
|
||||||
|
<span>{result.ref_min}</span>
|
||||||
|
<span>{result.ref_max}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
))}
|
)
|
||||||
|
})}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ interface UserProfile {
|
|||||||
alcohol: boolean | null
|
alcohol: boolean | null
|
||||||
diet: string | null
|
diet: string | null
|
||||||
avatar_url: string | null
|
avatar_url: string | null
|
||||||
|
has_mistral_key: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export function ProfilePage() {
|
export function ProfilePage() {
|
||||||
@@ -37,6 +38,8 @@ export function ProfilePage() {
|
|||||||
const [alcohol, setAlcohol] = useState<boolean | null>(null)
|
const [alcohol, setAlcohol] = useState<boolean | null>(null)
|
||||||
const [dietId, setDietId] = useState<number | null>(null)
|
const [dietId, setDietId] = useState<number | null>(null)
|
||||||
const [avatarUrl, setAvatarUrl] = useState<string | null>(null)
|
const [avatarUrl, setAvatarUrl] = useState<string | null>(null)
|
||||||
|
const [mistralApiKey, setMistralApiKey] = useState('')
|
||||||
|
const [hasMistralKey, setHasMistralKey] = useState(false)
|
||||||
|
|
||||||
const avatarOptions = [
|
const avatarOptions = [
|
||||||
...[1, 2, 3, 4, 5, 6, 7].map(i => `/icons/user/icons8-male-user-50${i === 1 ? '' : `-${i}`}.png`),
|
...[1, 2, 3, 4, 5, 6, 7].map(i => `/icons/user/icons8-male-user-50${i === 1 ? '' : `-${i}`}.png`),
|
||||||
@@ -69,6 +72,7 @@ export function ProfilePage() {
|
|||||||
const diet = dietsData.find((d: Diet) => d.name === profile.diet)
|
const diet = dietsData.find((d: Diet) => d.name === profile.diet)
|
||||||
setDietId(diet?.id || null)
|
setDietId(diet?.id || null)
|
||||||
setAvatarUrl(profile.avatar_url)
|
setAvatarUrl(profile.avatar_url)
|
||||||
|
setHasMistralKey(profile.has_mistral_key)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.finally(() => {
|
.finally(() => {
|
||||||
@@ -102,6 +106,7 @@ export function ProfilePage() {
|
|||||||
alcohol,
|
alcohol,
|
||||||
diet_id: dietId,
|
diet_id: dietId,
|
||||||
avatar_url: avatarUrl,
|
avatar_url: avatarUrl,
|
||||||
|
mistral_api_key: mistralApiKey || null,
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -277,6 +282,27 @@ export function ProfilePage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* API Keys */}
|
||||||
|
<div className="card mb-lg">
|
||||||
|
<h3 className="mb-md">API Keys</h3>
|
||||||
|
<p className="text-secondary text-sm mb-md">Use your own Mistral API key for document parsing (optional)</p>
|
||||||
|
|
||||||
|
<div className="form-group">
|
||||||
|
<label htmlFor="mistralKey">Mistral API Key</label>
|
||||||
|
<input
|
||||||
|
id="mistralKey"
|
||||||
|
type="password"
|
||||||
|
className="input"
|
||||||
|
value={mistralApiKey}
|
||||||
|
onChange={(e) => setMistralApiKey(e.target.value)}
|
||||||
|
placeholder={hasMistralKey ? '••••••••••••••••' : 'Enter your API key'}
|
||||||
|
/>
|
||||||
|
{hasMistralKey && (
|
||||||
|
<span className="text-xs text-secondary mt-xs">You have an API key configured. Enter a new one to update.</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{message && (
|
{message && (
|
||||||
<div className={message.type === 'success' ? 'success-message' : 'error-message'}>
|
<div className={message.type === 'success' ? 'success-message' : 'error-message'}>
|
||||||
{message.text}
|
{message.text}
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ interface Source {
|
|||||||
file_path: string
|
file_path: string
|
||||||
file_type: string
|
file_type: string
|
||||||
file_size: number
|
file_size: number
|
||||||
|
status: string
|
||||||
|
biomarker_count: number | null
|
||||||
ocr_data: string | null
|
ocr_data: string | null
|
||||||
description: string | null
|
description: string | null
|
||||||
uploaded_at: string
|
uploaded_at: string
|
||||||
@@ -19,6 +21,7 @@ export function SourcesPage() {
|
|||||||
const [error, setError] = useState<string | null>(null)
|
const [error, setError] = useState<string | null>(null)
|
||||||
const [dragOver, setDragOver] = useState(false)
|
const [dragOver, setDragOver] = useState(false)
|
||||||
const [deleteConfirmId, setDeleteConfirmId] = useState<number | null>(null)
|
const [deleteConfirmId, setDeleteConfirmId] = useState<number | null>(null)
|
||||||
|
const [parsingId, setParsingId] = useState<number | null>(null)
|
||||||
const fileInputRef = useRef<HTMLInputElement>(null)
|
const fileInputRef = useRef<HTMLInputElement>(null)
|
||||||
|
|
||||||
// Fetch sources on mount
|
// Fetch sources on mount
|
||||||
@@ -98,6 +101,31 @@ export function SourcesPage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleParse = async (id: number) => {
|
||||||
|
setParsingId(id)
|
||||||
|
setError(null)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`/api/sources/${id}/parse`, {
|
||||||
|
method: 'POST',
|
||||||
|
credentials: 'include',
|
||||||
|
})
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
// Refresh sources to show updated status
|
||||||
|
fetchSources()
|
||||||
|
console.log('Parsed:', data)
|
||||||
|
} else {
|
||||||
|
const err = await res.json()
|
||||||
|
setError(err.error || 'Parse failed')
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to parse:', e)
|
||||||
|
setError('Failed to parse document')
|
||||||
|
} finally {
|
||||||
|
setParsingId(null)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const formatFileSize = (bytes: number) => {
|
const formatFileSize = (bytes: number) => {
|
||||||
if (bytes < 1024) return `${bytes} B`
|
if (bytes < 1024) return `${bytes} B`
|
||||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
|
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
|
||||||
@@ -191,12 +219,31 @@ export function SourcesPage() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex gap-sm items-center">
|
<div className="flex gap-sm items-center">
|
||||||
{source.ocr_data ? (
|
{source.status === 'parsed' ? (
|
||||||
<span className="status-parsed flex items-center gap-xs text-xs">
|
<span className="status-parsed flex items-center gap-xs text-xs">
|
||||||
<img src="/icons/general/icons8-checkmark-50.png" alt="Parsed" className="icon-sm" /> Parsed
|
<img src="/icons/general/icons8-checkmark-50.png" alt="Parsed" className="icon-sm" />
|
||||||
|
{source.biomarker_count ? `${source.biomarker_count} biomarkers` : 'Parsed'}
|
||||||
</span>
|
</span>
|
||||||
|
) : source.status === 'processing' ? (
|
||||||
|
<span className="status-processing text-xs text-secondary">
|
||||||
|
Processing...
|
||||||
|
</span>
|
||||||
|
) : source.status === 'failed' ? (
|
||||||
|
<button
|
||||||
|
className="btn btn-primary btn-sm"
|
||||||
|
onClick={() => handleParse(source.id)}
|
||||||
|
disabled={parsingId === source.id}
|
||||||
|
>
|
||||||
|
Retry
|
||||||
|
</button>
|
||||||
) : (
|
) : (
|
||||||
<span className="text-secondary text-xs">Pending</span>
|
<button
|
||||||
|
className="btn btn-primary btn-sm"
|
||||||
|
onClick={() => handleParse(source.id)}
|
||||||
|
disabled={parsingId === source.id}
|
||||||
|
>
|
||||||
|
Parse
|
||||||
|
</button>
|
||||||
)}
|
)}
|
||||||
<button
|
<button
|
||||||
className="btn btn-danger btn-sm"
|
className="btn btn-danger btn-sm"
|
||||||
|
|||||||
Reference in New Issue
Block a user